Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN, Birch, MeanShift
|
5 |
+
from sklearn_extra.cluster import KMedoids
|
6 |
+
from sklearn.mixture import GaussianMixture
|
7 |
+
from sklearn.decomposition import PCA
|
8 |
+
from scipy.cluster.hierarchy import linkage, dendrogram
|
9 |
+
import matplotlib.pyplot as plt
|
10 |
+
import skfuzzy as fuzz # For Fuzzy C-Means
|
11 |
+
import hdbscan
|
12 |
+
|
13 |
+
def apply_clustering(algorithm, n_clusters, dataset):
|
14 |
+
# Read dataset
|
15 |
+
data = pd.read_csv(dataset.name)
|
16 |
+
data_matrix = data.values
|
17 |
+
|
18 |
+
# Apply dimensionality reduction for visualization
|
19 |
+
pca = PCA(n_components=2)
|
20 |
+
reduced_data = pca.fit_transform(data_matrix)
|
21 |
+
|
22 |
+
# Select clustering algorithm
|
23 |
+
if algorithm == "KMeans":
|
24 |
+
model = KMeans(n_clusters=n_clusters, random_state=42)
|
25 |
+
labels = model.fit_predict(data_matrix)
|
26 |
+
elif algorithm == "KMedoid (PAM)":
|
27 |
+
model = KMedoids(n_clusters=n_clusters, method='pam', random_state=42)
|
28 |
+
labels = model.fit_predict(data_matrix)
|
29 |
+
elif algorithm == "Fuzzy C-Means (FCM)":
|
30 |
+
# Use skfuzzy for Fuzzy C-Means
|
31 |
+
cntr, u, _, _, _, _, _ = fuzz.cmeans(data_matrix.T, n_clusters, 2, error=0.005, maxiter=1000)
|
32 |
+
labels = np.argmax(u, axis=0) # Get the fuzzy cluster assignment
|
33 |
+
elif algorithm == "Agglomerative Hierarchical Clustering (AHC)":
|
34 |
+
model = AgglomerativeClustering(n_clusters=n_clusters)
|
35 |
+
labels = model.fit_predict(data_matrix)
|
36 |
+
|
37 |
+
# Dendrogram
|
38 |
+
Z = linkage(data_matrix, 'ward') # 'ward' minimizes variance of merged clusters
|
39 |
+
plt.figure(figsize=(10, 7))
|
40 |
+
dendrogram(Z)
|
41 |
+
plt.title("Dendrogram for Agglomerative Clustering")
|
42 |
+
plt.xlabel("Sample index")
|
43 |
+
plt.ylabel("Distance")
|
44 |
+
plt.tight_layout()
|
45 |
+
plt.savefig("dendrogram.png")
|
46 |
+
plt.close()
|
47 |
+
return f"Agglomerative clustering applied successfully.", "dendrogram.png"
|
48 |
+
|
49 |
+
elif algorithm == "BIRCH":
|
50 |
+
model = Birch(n_clusters=n_clusters)
|
51 |
+
labels = model.fit_predict(data_matrix)
|
52 |
+
elif algorithm == "DBSCAN":
|
53 |
+
model = DBSCAN(eps=0.5, min_samples=5)
|
54 |
+
labels = model.fit_predict(data_matrix)
|
55 |
+
elif algorithm == "HDBSCAN":
|
56 |
+
model = hdbscan.HDBSCAN(min_samples=5)
|
57 |
+
labels = model.fit_predict(data_matrix)
|
58 |
+
elif algorithm == "Mean-Shift":
|
59 |
+
model = MeanShift()
|
60 |
+
labels = model.fit_predict(data_matrix)
|
61 |
+
elif algorithm == "Gaussian Mixture Models (GMM)":
|
62 |
+
model = GaussianMixture(n_components=n_clusters)
|
63 |
+
model.fit(data_matrix)
|
64 |
+
labels = model.predict(data_matrix)
|
65 |
+
else:
|
66 |
+
return "Algorithm not supported yet.", None
|
67 |
+
|
68 |
+
# Visualization for Clustering
|
69 |
+
plt.figure(figsize=(8, 6))
|
70 |
+
plt.scatter(reduced_data[:, 0], reduced_data[:, 1], c=labels, cmap='viridis', s=50)
|
71 |
+
plt.colorbar(label='Cluster Label')
|
72 |
+
plt.title(f"Clusters Visualization ({algorithm})")
|
73 |
+
plt.xlabel("PCA Component 1")
|
74 |
+
plt.ylabel("PCA Component 2")
|
75 |
+
plt.tight_layout()
|
76 |
+
plt.savefig("clusters_plot.png")
|
77 |
+
plt.close()
|
78 |
+
|
79 |
+
return f"{algorithm} clustering applied successfully.", "clusters_plot.png"
|
80 |
+
|
81 |
+
# Gradio Interface
|
82 |
+
def main_interface():
|
83 |
+
dataset = gr.File(label="Upload Dataset (CSV format)")
|
84 |
+
algorithm = gr.Dropdown(
|
85 |
+
choices=[
|
86 |
+
"KMeans",
|
87 |
+
"KMedoid (PAM)",
|
88 |
+
"Fuzzy C-Means (FCM)",
|
89 |
+
"Agglomerative Hierarchical Clustering (AHC)",
|
90 |
+
"BIRCH",
|
91 |
+
"DBSCAN",
|
92 |
+
"HDBSCAN",
|
93 |
+
"Mean-Shift",
|
94 |
+
"Gaussian Mixture Models (GMM)"
|
95 |
+
],
|
96 |
+
label="Select Algorithm"
|
97 |
+
)
|
98 |
+
n_clusters = gr.Slider(minimum=2, maximum=10, value=3, step=1, label="Number of Clusters (for applicable algorithms)")
|
99 |
+
|
100 |
+
output_text = gr.Textbox(label="Result")
|
101 |
+
output_image = gr.Image(label="Cluster Visualization")
|
102 |
+
|
103 |
+
gr.Interface(
|
104 |
+
fn=apply_clustering,
|
105 |
+
inputs=[algorithm, n_clusters, dataset],
|
106 |
+
outputs=[output_text, output_image]
|
107 |
+
).launch(debug=True)
|
108 |
+
|
109 |
+
# Run the application
|
110 |
+
if __name__ == "__main__":
|
111 |
+
main_interface()
|