MatthiasPi
/

AIVIZ

Model card Files Files and versions Community

MatthiasPi commited on Apr 18, 2023

Commit

a3171a2

•

1 Parent(s): 09bf6bf

commit the whole project

Browse files

Files changed (21) hide show

.gitignore +1 -0
README.md +21 -0
algos/classification/logistic.py +31 -0
algos/classification/nnclassifier.py +29 -0
algos/classification/svmclassifier.py +30 -0
algos/clustering/dbscan.py +26 -0
algos/clustering/kmeans.py +24 -0
algos/clustering/kproto.py +44 -0
algos/others/others_page.py +4 -0
algos/regression/elasticnet.py +43 -0
algos/regression/linR.py +41 -0
algos/regression/ridge.py +37 -0
analysis/exploration.py +20 -0
analysis/preprocessing.py +113 -0
app.py +49 -0
carott.png +0 -0
requirements.txt +10 -0
utilities/components.py +118 -0
utilities/land.py +35 -0
utilities/standard_template.py +58 -0
utilities/template_helpers.py +9 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ **/__pycache__/

README.md ADDED Viewed

	@@ -0,0 +1,21 @@

+# AIViz
+Software Engineering Project ESILV DIA 1
+Web application allowing the user to perform Machine Learning over their own datasets. Results are then diplayed through dynamic visualizations, and downlodable.
+To run the app locally :
+- Make sure to have Python 3.10+
+- Install App dependencies:
+```
+pip install -r requirements.txt
+```
+- Run the app with Streamlit
+```
+streamlit run app.py engine=python
+```
+AIViz is also accessible <a href="https://clementcornet-aiviz-app-n0g5vp.streamlit.app/">online</a>.

algos/classification/logistic.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import streamlit as st
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+from types import NoneType
+def process(data):
+    if type(data[0]) == NoneType or type(data[1]) == NoneType: # if either training or testing dataset is still missing
+        st.info('Please Upload Data')
+        return None
+    if 'object' in list(data[0].dtypes) or 'object' in list(data[1].dtypes):
+        st.info('Please Upload Numerica Data.')
+        return None
+    st.write(data[0].dtypes)
+    x_train = data[0].iloc[:,:-1]
+    y_train = data[0].iloc[:,-1]
+    #st.write(x_train.shape)
+    x_test = data[1].iloc[:,:x_train.shape[1]]
+    #st.dataframe(data[1])
+    #st.write(x_test.shape)
+    if len(x_train.columns) != len(x_test.columns):
+        st.info('Training and testing datasets have different column number, cannot perform classification.')
+        return None
+    clf = LogisticRegression(random_state=0).fit(x_train, y_train)
+    #clf.fit(x_train, y_train)
+    pred = clf.predict(x_test)
+    x_test[data[0].columns[-1]] = pred
+    return x_test

algos/classification/nnclassifier.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import streamlit as st
+from sklearn.neural_network import MLPClassifier
+import pandas as pd
+from types import NoneType
+def process(data):
+    if type(data[0]) == NoneType or type(data[1]) == NoneType: # if either training or testing dataset is still missing
+        st.info('Please Upload Data')
+        return None
+    if 'object' in list(data[0].dtypes) or 'object' in list(data[1].dtypes):
+        st.info('Please Upload Numerica Data.')
+        return None
+    x_train = data[0].iloc[:,:-1]
+    y_train = data[0].iloc[:,-1]
+    #st.write(x_train.shape)
+    x_test = data[1].iloc[:,:x_train.shape[1]]
+    #st.dataframe(data[1])
+    #st.write(x_test.shape)
+    if len(x_train.columns) != len(x_test.columns):
+        st.info('Training and testing datasets have different column number, cannot perform classification.')
+        return None
+    clf = MLPClassifier(random_state=1, max_iter=300).fit(x_train, y_train)
+    pred = clf.predict(x_test)
+    #x_test[data[0].columns[-1]] = pred
+    x_test[data[0].columns[-1]] = pred
+    #st.dataframe(x_test)
+    return x_test

algos/classification/svmclassifier.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import streamlit as st
+import numpy as np
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.svm import SVC
+from types import NoneType
+def process(data):
+    if type(data[0]) == NoneType or type(data[1]) == NoneType: # if either training or testing dataset is still missing
+        st.info('Please Upload Data')
+        return None
+    if 'object' in list(data[0].dtypes) or 'object' in list(data[1].dtypes):
+        st.info('Please Upload Numerica Data.')
+        return None
+    x_train = data[0].iloc[:,:-1]
+    y_train = data[0].iloc[:,-1]
+    #st.write(x_train.shape)
+    x_test = data[1].iloc[:,:x_train.shape[1]]
+    #st.dataframe(data[1])
+    #st.write(x_test.shape)
+    if len(x_train.columns) != len(x_test.columns):
+        st.info('Training and testing datasets have different column number, cannot perform classification.')
+        return None
+    clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
+    clf.fit(x_train, y_train)
+    pred = clf.predict(x_test)
+    x_test[data[0].columns[-1]] = pred
+    return x_test

algos/clustering/dbscan.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import pandas as pd
+import streamlit as st
+from sklearn.cluster import DBSCAN
+from sklearn.preprocessing import StandardScaler
+import numpy as np
+def process(data):
+    if 'object' in list(data[0].dtypes):
+        st.info('This Algorithm can only process numerical data')
+        return None
+    scaler = StandardScaler()
+    df = data[0].copy()
+    for c in data[0].columns:
+        df[c] = scaler.fit_transform(data[0][[c]])
+    max_distance = st.slider("""Maximum distance between two samples for one to be considered
+                                as in the neighborhood of the other. :""",0.01,5.0)
+    dbscan = DBSCAN(max_distance)
+    res = dbscan.fit_predict(df)
+    df = data[0]
+    df['cluster'] = res
+    return df

algos/clustering/kmeans.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+import streamlit as st
+from sklearn.cluster import KMeans
+def process(data):
+    if 'object' in list(data[0].dtypes):
+        st.info('This Algorithm can only process numerical data')
+        return None
+    scaler = StandardScaler()
+    df = data[0].copy()
+    for c in data[0].columns:
+        df[c] = scaler.fit_transform(data[0][[c]])
+    k = st.slider('Number of Clusters :',2,9)
+    kmeans = KMeans(k)
+    res = kmeans.fit_predict(df)
+    df = data[0]
+    df['cluster'] = res
+    return df

algos/clustering/kproto.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from sklearn.preprocessing import StandardScaler
+from kmodes.kprototypes import KPrototypes
+from kmodes.kprototypes import euclidean_dissim
+import streamlit as st
+import algos.clustering.kmeans
+def process(data):
+    """Process K-prototype"""
+    df = data[0]
+    if 'object' not in list(df.dtypes):
+        return algos.clustering.kmeans.process(data)
+    k = st.slider('Number of Clusters :',2,9)
+    numerical_columns = df.select_dtypes('number').columns
+    categorical_columns = df.select_dtypes('object').columns
+    categorical_indexes = []
+    # Scaling
+    scaler = StandardScaler()
+    for c in categorical_columns:
+        categorical_indexes.append(df.columns.get_loc(c))
+    if len(numerical_columns) == 0 or len(categorical_columns) == 0:
+        return
+    # create a copy of our data to be scaled
+    df_scale = df.copy()
+    # standard scale numerical features
+    for c in numerical_columns:
+        df_scale[c] = scaler.fit_transform(df[[c]])
+    # Process Data
+    kproto = KPrototypes(n_clusters=k,
+                        num_dissim=euclidean_dissim,
+                        random_state=0)
+    kproto.fit_predict(df_scale, categorical= categorical_indexes)
+    # add clusters to dataframe
+    df = data[0]
+    df["cluster"] = kproto.labels_
+    return df

algos/others/others_page.py ADDED Viewed

	@@ -0,0 +1,4 @@

+import streamlit as st
+def render():
+    st.title("Other Algorithms")

algos/regression/elasticnet.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from sklearn.linear_model import ElasticNet
+import streamlit as st
+from types import NoneType
+def process(data):
+    if type(data[0]) == NoneType or type(data[1]) == NoneType: # if either training or testing dataset is still missing
+        st.info('Please Upload Data')
+        return None
+    if 'object' in list(data[0].dtypes) or 'object' in list(data[1].dtypes):
+        st.info('Please Upload Numerica Data.')
+        return None
+    if len(data) == 0:
+        st.info('Please Upload Data')
+        return None
+    x_train = data[0].iloc[:,:-1]
+    y_train = data[0].iloc[:,-1]
+    #st.write(x_train.shape)
+    x_test = data[1].iloc[:,:x_train.shape[1]]
+    #st.dataframe(data[1])
+    #st.write(x_test.shape)
+    if len(x_train.columns) != len(x_test.columns):
+        st.info('Training and testing datasets have different column number, cannot perform classification.')
+        return None
+    clf = ElasticNet().fit(x_train, y_train)
+    pred = clf.predict(x_test)
+    cols = x_train.columns
+    #st.write(clf.coef_)
+    st.latex(f"  {x_train.columns[-1]} =   ")
+    coeffs = ['{:.4f}'.format(float(c)) for c in clf.coef_]
+    #st.write(coeffs)
+    eq = ' + '.join([str(col) +' × '+ (alpha) for col,alpha in zip(coeffs,cols)])
+    st.markdown(f" $$ {clf.intercept_} {eq} $$")
+    st.latex(f" R² = {clf.score(x_train, y_train)} ")
+    x_test[data[0].columns[-1]] = pred
+    return x_test

algos/regression/linR.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from sklearn.linear_model import LinearRegression
+import streamlit as st
+from types import NoneType
+def process(data):
+    if type(data[0]) == NoneType or type(data[1]) == NoneType: # if either training or testing dataset is still missing
+        st.info('Please Upload Data')
+        return None
+    if len(data) == 0:
+        st.info('Please Upload Data')
+        return None
+    x_train = data[0].iloc[:,:-1]
+    y_train = data[0].iloc[:,-1]
+    #st.write(x_train.shape)
+    x_test = data[1].iloc[:,:x_train.shape[1]]
+    #st.dataframe(data[1])
+    #st.write(x_test.shape)
+    if len(x_train.columns) != len(x_test.columns):
+        st.info('Training and testing datasets have different column number, cannot perform classification.')
+        return None
+    if 'object' in list(data[0].dtypes) or 'object' in list(data[1].dtypes):
+        st.info('Please Upload Numerica Data.')
+        return None
+    reg = LinearRegression().fit(x_train, y_train)
+    cols = x_train.columns
+    #st.write(list(zip(reg.coef_,cols)))
+    st.latex(f"  {x_train.columns[-1]} =   ")
+    coeffs = ['{:.4f}'.format(float(c)) for c in reg.coef_]
+    eq = ' + '.join([str(col) +' × '+ (alpha) for col,alpha in zip(coeffs,cols)])
+    st.markdown(f" $$ {reg.intercept_} {eq} $$")
+    st.latex(f" R² = {reg.score(x_train, y_train)} ")
+    pred = reg.predict(x_test)
+    x_test[data[0].columns[-1]] = pred
+    return x_test

algos/regression/ridge.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from sklearn.linear_model import Ridge
+import streamlit as st
+from types import NoneType
+def process(data):
+    if type(data[0]) == NoneType or type(data[1]) == NoneType: # if either training or testing dataset is still missing
+        st.info('Please Upload Data')
+        return None
+    if len(data) == 0:
+        st.info('Please Upload Data')
+        return None
+    if 'object' in list(data[0].dtypes) or 'object' in list(data[1].dtypes):
+        st.info('Please Upload Numerica Data.')
+        return None
+    x_train = data[0].iloc[:,:-1]
+    y_train = data[0].iloc[:,-1]
+    #st.write(x_train.shape)
+    x_test = data[1].iloc[:,:x_train.shape[1]]
+    #st.dataframe(data[1])
+    #st.write(x_test.shape)
+    if len(x_train.columns) != len(x_test.columns):
+        st.info('Training and testing datasets have different column number, cannot perform classification.')
+        return None
+    clf = Ridge(alpha=1.0).fit(x_train, y_train)
+    pred = clf.predict(x_test)
+    #st.write(clf.coef_)
+    cols = x_train.columns
+    st.latex(f"  {data[0].columns[-1]} =   ")
+    coeffs = ['{:.4f}'.format(float(c)) for c in clf.coef_]
+    eq = ' + '.join([str(col) +' × '+ (alpha) for col,alpha in zip(coeffs,cols)])
+    st.markdown(f" $$ {clf.intercept_} + {eq} $$")
+    st.latex(f" R² = {clf.score(x_train, y_train)} ")
+    x_test[data[0].columns[-1]] = pred
+    return x_test

analysis/exploration.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import streamlit as st
+from utilities.template_helpers import upload_data
+import pandas as pd
+from types import NoneType
+from pandas_profiling import ProfileReport
+from streamlit_pandas_profiling import st_profile_report
+import sys
+def render():
+    st.title("DATA EXPLORATION")
+    col1, col2 = st.columns([2,5])
+    df = None
+    with col1.container():
+        df = upload_data()
+        if type(df) is NoneType:
+            return
+        st.dataframe(df.describe())
+    with col2.container():
+        pr = ProfileReport(df)
+        st_profile_report(pr)

analysis/preprocessing.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import streamlit as st
+from utilities.template_helpers import upload_data
+from types import NoneType
+import pandas as pd
+import numpy as np
+from sklearn.preprocessing import StandardScaler
+def render():
+    st.title("PREPROCESSING")
+    # dropna
+    # fillna
+    # select columns
+    # scaling
+    col1, col2, col3 = st.columns([1,1,1])
+    df = None
+    with col1.container():
+        df = upload_data()
+        if type(df) is NoneType:
+            return
+        if df.shape == (0,0):
+            return
+        info = pd.DataFrame()
+        info['dtypes'] = pd.DataFrame(df.dtypes)
+        info['null'] = df.isna().sum()
+        tab1, tab2 = st.tabs(['Dataframe','Info'])
+        with tab1:
+            st.dataframe(df, use_container_width=True, height=300)
+        with tab2:
+            st.dataframe(info,use_container_width=True,height=300)
+    with col2.container():
+        ### DROP NA ###
+        st.write('\n\n')
+        st.markdown('#### Drop Null Values')
+        st.write('Drop any row containing null values')
+        drop_null = st.checkbox('Drop')
+        if drop_null:
+            df.dropna(inplace=True)
+        ### FILL NA ####
+        st.write("\n\n")
+        st.markdown('#### Fill Null Values')
+        st.write("""Replace null values with mean of the column for numerical variables,
+                     and mode for categorical variables""")
+        fill_null = st.checkbox('Fill')
+        if fill_null:
+            for col in df.columns:
+                val = 0
+                if df[col].dtype == 'object':
+                    val = df[col].mode()
+                else:
+                    val = df[col].mean()
+                df[col].fillna(val)
+        ### SCALING ###
+        st.write('\n\n')
+        st.markdown("#### Scaling")
+        st.write("Standardize numerical features by removing the mean and scaling to unit variance.")
+        scale = st.checkbox('Scale')
+        if scale:
+            numerical_columns = df.select_dtypes('number').columns
+            categorical_columns = df.select_dtypes('object').columns
+            categorical_indexes = []
+            # Scaling
+            scaler = StandardScaler()
+            for c in categorical_columns:
+                categorical_indexes.append(df.columns.get_loc(c))
+            # create a copy of our data to be scaled
+            df_scale = df.copy()
+            # standard scale numerical features
+            for c in numerical_columns:
+                df_scale[c] = scaler.fit_transform(df[[c]])
+            df = df_scale
+    with col3.container():
+        ### SELECT COLUMNS
+        st.write("\n\n")
+        st.markdown("#### Choose columns")
+        cols = st.multiselect('Select columns to use',options=list(df.columns),default=list(df.columns))
+        #select_cols = st.button('Use selected columns')
+        #if select_cols:
+        df = df[cols]
+        st.write("\n\n")
+        st.markdown("#### Encode Numerical values")
+        enc = st.checkbox('Encode')
+        if enc:
+            df.loc[:,df.dtypes == 'object']=df.loc[:,df.dtypes == 'object'].apply(
+            lambda x: x.replace(x.unique(),list(range(1,1+len(x.unique())))))
+        st.write('\n\n')
+        st.markdown("#### Download Preprocessed data")
+        st.download_button("Download Results",
+                            df.to_csv(index=False),
+                            "preprocessed.csv",
+                            "text/csv",
+                            key="download-csv")
+        #st.dataframe(df)
+#def res_session():
+#    st.session_state['drop_na'] = False
+#    st.session_state['fill_na'] = False
+#    st.session_state['scale'] = False
+#    st.session_state['']

app.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import streamlit as st
+from utilities.standard_template import Page, get_info
+from utilities.land import land_page
+import analysis.preprocessing
+import analysis.exploration
+import warnings
+import algos.others.others_page
+warnings.filterwarnings("ignore")
+# PAGE CONFIGURATION, CHANGE NAME AND ICON
+st.set_page_config(layout="wide",page_title='AIViz',page_icon='carott.png')
+hide_streamlit_style = """
+            <style>
+            #MainMenu {visibility: hidden;}
+            footer {visibility: hidden;}
+            </style>
+            """
+#st.markdown(hide_streamlit_style, unsafe_allow_html=True)
+with st.sidebar:
+    #st.image('carott.png')
+    choice = st.selectbox('Choose Algorithm Category',[
+                                                " --- Choose --- ",
+                                                "Clustering",
+                                                "Classification",
+                                                "Regression",
+                                                "Data Exploration",
+                                                "Data Preprocessing",
+                                                #"Others"
+                                            ])
+    get_info(choice)
+if choice in ['Clustering', 'Classification', 'Regression']:
+    Page(choice).render()
+elif choice == 'Data Preprocessing':
+    analysis.preprocessing.render()
+elif choice == 'Data Exploration':
+    analysis.exploration.render()
+elif choice == 'Others':
+    algos.others.others_page.render()
+else:
+    land_page()

carott.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+numpy
+pandas
+kmodes
+scikit-learn
+streamlit
+extra_streamlit_components
+plotly
+prince
+pandas-profiling
+streamlit-pandas-profiling

utilities/components.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import streamlit as st
+from utilities.template_helpers import upload_data
+from types import NoneType
+import extra_streamlit_components as stx
+import prince
+import plotly.express as px
+import pandas as pd
+import algos.clustering.kmeans
+import algos.clustering.dbscan
+import algos.clustering.kproto
+import algos.classification.nnclassifier
+import algos.classification.logistic
+import algos.classification.svmclassifier
+import algos.regression.linR
+import algos.regression.ridge
+import algos.regression.elasticnet
+from types import NoneType
+def get_data(category, algo_name=None):
+    if category in ['Classification','Regression']:
+        train = upload_data('Training Data')
+        test = upload_data('Testing Data')
+        return train, test
+    else:
+        df = upload_data()
+        if type(df) != NoneType:
+            return (df,)
+def choose_algo(category):
+    if category == 'Clustering':
+        algo = stx.tab_bar(data=[
+            stx.TabBarItemData(id='K-Means',title='K-Means',description='Partitional Clustering Algorithm'),
+            stx.TabBarItemData(id='DBSCAN',title='DBSCAN',description='Density Based Clustering Algorithm'),
+            stx.TabBarItemData(id='K-Prototype',title='K-Prototype',description='Partitional over Mixed Data')]
+        )
+        if algo == 'K-Means':
+            return algos.clustering.kmeans.process
+        if algo == 'DBSCAN':
+            return algos.clustering.dbscan.process
+        if algo == 'K-Prototype':
+            return algos.clustering.kproto.process
+    elif category == 'Classification':
+        algo = stx.tab_bar(data=[
+            stx.TabBarItemData(id='NN',title='Neural Network',description='Multi-Layer Perceptron classifier'),
+            stx.TabBarItemData(id='SVM',title='Suport Vector Classifier',
+                        description='Classification using Support Vector Machines'),
+            stx.TabBarItemData(id='logR',title='Logistic Regression',description='Logistic Regression Classifier')]
+        )
+        if algo == 'NN':
+            return algos.classification.nnclassifier.process
+        if algo == 'SVM':
+            return algos.classification.svmclassifier.process
+        if algo == 'logR':
+            return algos.classification.logistic.process
+    elif category == 'Regression':
+        algo = stx.tab_bar(data=[
+            stx.TabBarItemData(id='linR',title='Linear Regression',description='Linear Regression'),
+            stx.TabBarItemData(id='ridge',title='Ridge',
+                        description='Ridge Regression'),
+            stx.TabBarItemData(id='elastic',title='Elastic Net Regression',description='Elastic Net Regression')]
+        )
+        if algo == 'linR':
+            return algos.regression.linR.process
+        if algo == 'ridge':
+            return algos.regression.ridge.process
+        if algo == 'elastic':
+            return algos.regression.elasticnet.process
+def get_plot(df, title):
+    if title == 'Regression':
+        return None # Do not plot regression, display its coefficients
+    reduce_algo = None
+    pca = None
+    # Better title for the graph
+    viz_thing = 'Clusters'
+    if title == 'Classification':
+        viz_thing = 'Classes'
+    # name of column to represent as color on the graph (target class)
+    if type(df) == NoneType:
+        return None
+    if len(df) == 0:
+        return None
+    target_class = df.columns[-1]
+    if df.shape == (0,0):
+        return None
+    if 'object' in list(df.dtypes):
+        reduce_algo = 'FAMD'
+        pca = prince.FAMD(n_components=3)
+    else:
+        reduce_algo = 'Principal Component Analysis'
+        pca = prince.PCA(n_components=3)
+    reduced = pca.fit(df.iloc[:,:-1]).row_coordinates(df.iloc[:,:-1])
+    reduced.columns = ['X','Y','Z']
+    reduced[target_class] = df[target_class].astype(str)
+    # Each axe's inertia
+    labs = {
+        "X" : f"Component 0 - ({round(100*pca.explained_inertia_[0],2)}% inertia)",
+        "Y" : f"Component 1 - ({round(100*pca.explained_inertia_[1],2)}% inertia)",
+        "Z" : f"Component 2 - ({round(100*pca.explained_inertia_[2],2)}% inertia)",
+    }
+    tot_inertia = f"{round(100*pca.explained_inertia_.sum(),2)}"
+    st.write(f'{reduce_algo} Visualization of {viz_thing} ({tot_inertia}%) :')
+    fig = px.scatter_3d(reduced,x='X',y='Y',z='Z',color=target_class,labels=labs)
+    fig.update_layout(margin=dict(l=0, r=0, b=0, t=0),showlegend=False,height=300)
+    return fig

utilities/land.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import streamlit as st
+def land_page():
+    _,center,_ = st.columns([2,3,2])
+    center.markdown("<h1 style='text-align: center;'>AIViz</h1>", unsafe_allow_html=True)
+    center.write("""Machine Learning. For everyone. Now. AIViz is a platform built to let everyone perform Machine
+    Learning easily on their own data.""")
+    center.image('carott.png')
+    center.markdown("<h3 style='text-align: center;'>Use your own data</h3>", unsafe_allow_html=True)
+    center.write("You can use your own data with AIViz. All you need is clicking a button. ")
+    center.markdown("<h3 style='text-align: center;'>Understand your Data</h3>", unsafe_allow_html=True)
+    center.write("""AIViz provides a Data Exploration tool, that lets you explore all your variables. You can
+                easily visalize and understand univariate and bivariate behavior of your data. """)
+    center.markdown("<h3 style='text-align: center;'>Preprocessing</h3>", unsafe_allow_html=True)
+    center.write("""You can prepare your data for Machine Learning in just a few clicks. You can decide how
+                    to handle missing values, choose which columns to use, scale your data...""")
+    center.markdown("<h3 style='text-align: center;'>Machine Learning</h3>", unsafe_allow_html=True)
+    st.latex("""The \ smartest \ carott \ of \ the \ World \\newline \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \
+        \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \
+        \ \ \ \ \ \ \ \ \ \ \ \ \   - \ us.""")
+    center.write("""The core of AIViz is Machine Learning. Now that you have uploaded and preprocessed
+                your data, you can perform Artificial Intelligence algorithms to it. We provide several
+                different algorithms, for Clustering, Classification or Regression.""")

utilities/standard_template.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import streamlit as st
+from utilities.components import get_data, choose_algo, get_plot
+from types import NoneType
+import pandas as pd
+def get_info(category):
+    infos = {
+        " --- Choose --- ":'We Provide several different types of algorithms, such as Clustering or Classification',
+        "Clustering":'Unsupervised, creates clusters of similars individuals',
+        "Classification":"""Supervised, assigns individuals to a class using
+                         training data. Last column will be used as targer class.""",
+        "Regression":"Supervised, predicts numerical value to a column, usign training data",
+        "Data Exploration":"Univariate and bivariate data analysis",
+        "Data Preprocessing":"Prepare data for Machine Learning",
+        "Others":'Other algorithms, such as linear regression'
+    }
+    st.info(infos[category])
+class Page:
+    def __init__(self, title) -> None:
+        self.title = title
+        self.data = None
+        self.algo = None
+        self.plot = None
+        self.results = None
+    def render(self):
+        st.title(self.title.upper())
+        col1, col2 = st.columns([2,5])
+        ##### CHOOSE DATA #####
+        with col1.container():
+            data = get_data(self.title)
+            if type(data) == tuple:
+                if self.title == 'Clustering' and type(data[0]) is not NoneType:
+                    st.dataframe(data[0], use_container_width=True,height=280)
+            self.data = data
+        with col2.container():
+            ##### CHOSE ALGORITHM #####
+            self.algo = choose_algo(self.title)
+            if self.algo is not None and self.data is not None:
+                self.results = pd.DataFrame(self.algo(self.data))
+                self.plot = get_plot(self.results, self.title)
+            ##### PLOT RESULTS #####
+            if self.plot is not None:
+                st.plotly_chart(self.plot)
+        ##### DOWNLOAD RESULTS #####
+        if self.results is not None:
+            col1.download_button("Download Results",
+                            self.results.to_csv(index=False),
+                            "results.csv",
+                            "text/csv",
+                            key="download-csv")

utilities/template_helpers.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import pandas as pd
+import streamlit as st
+def upload_data(descr='Upload Data'):
+    up = st.file_uploader(descr)
+    if up:
+        df = pd.read_csv(up).dropna()
+        return df