|
import pandas as pd |
|
import streamlit as st |
|
from sklearn.cluster import DBSCAN |
|
from sklearn.preprocessing import StandardScaler |
|
import numpy as np |
|
|
|
|
|
def process(data): |
|
|
|
if 'object' in list(data[0].dtypes): |
|
st.info('This Algorithm can only process numerical data') |
|
return None |
|
|
|
scaler = StandardScaler() |
|
df = data[0].copy() |
|
|
|
for c in data[0].columns: |
|
df[c] = scaler.fit_transform(data[0][[c]]) |
|
|
|
max_distance = st.slider("""Maximum distance between two samples for one to be considered |
|
as in the neighborhood of the other. :""",0.01,5.0) |
|
dbscan = DBSCAN(max_distance) |
|
res = dbscan.fit_predict(df) |
|
df = data[0] |
|
df['cluster'] = res |
|
return df |