cluster-analysis-dashboard/app.py at main · newdeskarl/cluster-analysis-dashboard · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# -*- coding: utf-8 -*-
"""
Created on Tue Feb 11 14:11:12 2025

@author: Newdeskarl
"""

import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Set page config
st.set_page_config(
    page_title="Cluster Analysis Dashboard",
    page_icon="📊",
    layout="wide"
)

# Add title
st.title("Cluster Analysis Dashboard")

# Load data
@st.cache_data
def load_data():
    df = pd.read_excel("pi_gouv_avec_cluster.xlsx")
    return df

try:
    df = load_data()

    # Create columns for filters
    col1, col2, col3 = st.columns(3)

    # Filter columns to exclude certain fields
    exclude_cols = ['NomPI', 'Lat', 'Long', 'kmeans_clusters']
    columns = [col for col in df.columns if col not in exclude_cols]

    with col1:
        selected_column = st.selectbox(
            "Select Column to Analyze:",
            columns
        )

    with col2:
        selected_pi = st.selectbox(
            "Select NomPI:",
            ["All"] + list(df['NomPI'].unique())
        )

    with col3:
        selected_cluster = st.selectbox(
            "Select Cluster:",
            ["All"] + list(df['kmeans_clusters'].unique().astype(str))
        )

    # Filter data based on selections
    filtered_df = df.copy()

    if selected_pi != "All":
        filtered_df = filtered_df[filtered_df['NomPI'] == selected_pi]

    if selected_cluster != "All":
        filtered_df = filtered_df[filtered_df['kmeans_clusters'] == int(selected_cluster)]

    # Create pie chart
    value_counts = filtered_df[selected_column].value_counts()

    fig = go.Figure(data=[go.Pie(
        labels=value_counts.index,
        values=value_counts.values,
        hole=0.3,
        textinfo='label+percent',
        hovertemplate="<b>%{label}</b><br>" +
                      "Count: %{value}<br>" +
                      "Percentage: %{percent}<extra></extra>"
    )])

    # Update layout
    title = f"Distribution of {selected_column}"
    if selected_pi != "All":
        title += f" for {selected_pi}"
    if selected_cluster != "All":
        title += f" in Cluster {selected_cluster}"

    fig.update_layout(
        title=title,
        showlegend=True,
        height=600
    )

    # Display the chart
    st.plotly_chart(fig, use_container_width=True)

    # Display detailed statistics
    st.subheader("Detailed Statistics")

    # Create a DataFrame with the statistics
    stats_df = pd.DataFrame({
        'Value': value_counts.index,
        'Count': value_counts.values,
        'Percentage': (value_counts.values / value_counts.sum() * 100).round(2)
    })

    # Display the statistics in a nice format
    st.dataframe(
        stats_df.style.format({'Percentage': '{:.2f}%'}),
        hide_index=True
    )

except Exception as e:
    st.error(f"An error occurred: {str(e)}")
    st.info("Please make sure the file 'pi_gouv_avec_cluster.xlsx' is in the same directory as this script.")

# Add footer with instructions
st.markdown("---")
st.markdown("""
### How to use this dashboard:
1. Select a column to analyze from the first dropdown
2. Optionally filter by specific NomPI
3. Optionally filter by specific cluster
4. The pie chart and statistics will update automatically
""")