Sujatha commited on
Commit
f18d844
·
verified ·
1 Parent(s): dd1d75f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import pandas as pd
4
+ from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN, Birch, MeanShift
5
+ from sklearn_extra.cluster import KMedoids
6
+ from sklearn.mixture import GaussianMixture
7
+ from sklearn.decomposition import PCA
8
+ from scipy.cluster.hierarchy import linkage, dendrogram
9
+ import matplotlib.pyplot as plt
10
+ import skfuzzy as fuzz # For Fuzzy C-Means
11
+ import hdbscan
12
+
13
+ def apply_clustering(algorithm, n_clusters, dataset):
14
+ # Read dataset
15
+ data = pd.read_csv(dataset.name)
16
+ data_matrix = data.values
17
+
18
+ # Apply dimensionality reduction for visualization
19
+ pca = PCA(n_components=2)
20
+ reduced_data = pca.fit_transform(data_matrix)
21
+
22
+ # Select clustering algorithm
23
+ if algorithm == "KMeans":
24
+ model = KMeans(n_clusters=n_clusters, random_state=42)
25
+ labels = model.fit_predict(data_matrix)
26
+ elif algorithm == "KMedoid (PAM)":
27
+ model = KMedoids(n_clusters=n_clusters, method='pam', random_state=42)
28
+ labels = model.fit_predict(data_matrix)
29
+ elif algorithm == "Fuzzy C-Means (FCM)":
30
+ # Use skfuzzy for Fuzzy C-Means
31
+ cntr, u, _, _, _, _, _ = fuzz.cmeans(data_matrix.T, n_clusters, 2, error=0.005, maxiter=1000)
32
+ labels = np.argmax(u, axis=0) # Get the fuzzy cluster assignment
33
+ elif algorithm == "Agglomerative Hierarchical Clustering (AHC)":
34
+ model = AgglomerativeClustering(n_clusters=n_clusters)
35
+ labels = model.fit_predict(data_matrix)
36
+
37
+ # Dendrogram
38
+ Z = linkage(data_matrix, 'ward') # 'ward' minimizes variance of merged clusters
39
+ plt.figure(figsize=(10, 7))
40
+ dendrogram(Z)
41
+ plt.title("Dendrogram for Agglomerative Clustering")
42
+ plt.xlabel("Sample index")
43
+ plt.ylabel("Distance")
44
+ plt.tight_layout()
45
+ plt.savefig("dendrogram.png")
46
+ plt.close()
47
+ return f"Agglomerative clustering applied successfully.", "dendrogram.png"
48
+
49
+ elif algorithm == "BIRCH":
50
+ model = Birch(n_clusters=n_clusters)
51
+ labels = model.fit_predict(data_matrix)
52
+ elif algorithm == "DBSCAN":
53
+ model = DBSCAN(eps=0.5, min_samples=5)
54
+ labels = model.fit_predict(data_matrix)
55
+ elif algorithm == "HDBSCAN":
56
+ model = hdbscan.HDBSCAN(min_samples=5)
57
+ labels = model.fit_predict(data_matrix)
58
+ elif algorithm == "Mean-Shift":
59
+ model = MeanShift()
60
+ labels = model.fit_predict(data_matrix)
61
+ elif algorithm == "Gaussian Mixture Models (GMM)":
62
+ model = GaussianMixture(n_components=n_clusters)
63
+ model.fit(data_matrix)
64
+ labels = model.predict(data_matrix)
65
+ else:
66
+ return "Algorithm not supported yet.", None
67
+
68
+ # Visualization for Clustering
69
+ plt.figure(figsize=(8, 6))
70
+ plt.scatter(reduced_data[:, 0], reduced_data[:, 1], c=labels, cmap='viridis', s=50)
71
+ plt.colorbar(label='Cluster Label')
72
+ plt.title(f"Clusters Visualization ({algorithm})")
73
+ plt.xlabel("PCA Component 1")
74
+ plt.ylabel("PCA Component 2")
75
+ plt.tight_layout()
76
+ plt.savefig("clusters_plot.png")
77
+ plt.close()
78
+
79
+ return f"{algorithm} clustering applied successfully.", "clusters_plot.png"
80
+
81
+ # Gradio Interface
82
+ def main_interface():
83
+ dataset = gr.File(label="Upload Dataset (CSV format)")
84
+ algorithm = gr.Dropdown(
85
+ choices=[
86
+ "KMeans",
87
+ "KMedoid (PAM)",
88
+ "Fuzzy C-Means (FCM)",
89
+ "Agglomerative Hierarchical Clustering (AHC)",
90
+ "BIRCH",
91
+ "DBSCAN",
92
+ "HDBSCAN",
93
+ "Mean-Shift",
94
+ "Gaussian Mixture Models (GMM)"
95
+ ],
96
+ label="Select Algorithm"
97
+ )
98
+ n_clusters = gr.Slider(minimum=2, maximum=10, value=3, step=1, label="Number of Clusters (for applicable algorithms)")
99
+
100
+ output_text = gr.Textbox(label="Result")
101
+ output_image = gr.Image(label="Cluster Visualization")
102
+
103
+ gr.Interface(
104
+ fn=apply_clustering,
105
+ inputs=[algorithm, n_clusters, dataset],
106
+ outputs=[output_text, output_image]
107
+ ).launch(debug=True)
108
+
109
+ # Run the application
110
+ if __name__ == "__main__":
111
+ main_interface()