File size: 4,433 Bytes
a3171a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import streamlit as st
from utilities.template_helpers import upload_data
from types import NoneType
import extra_streamlit_components as stx

import prince
import plotly.express as px
import pandas as pd

import algos.clustering.kmeans
import algos.clustering.dbscan
import algos.clustering.kproto

import algos.classification.nnclassifier
import algos.classification.logistic
import algos.classification.svmclassifier

import algos.regression.linR
import algos.regression.ridge
import algos.regression.elasticnet

from types import NoneType

def get_data(category, algo_name=None):
    if category in ['Classification','Regression']:
        train = upload_data('Training Data')
        test = upload_data('Testing Data')
        return train, test
    else:
        df = upload_data()
        if type(df) != NoneType:
            return (df,)


def choose_algo(category):
    if category == 'Clustering':
        algo = stx.tab_bar(data=[
            stx.TabBarItemData(id='K-Means',title='K-Means',description='Partitional Clustering Algorithm'),
            stx.TabBarItemData(id='DBSCAN',title='DBSCAN',description='Density Based Clustering Algorithm'),
            stx.TabBarItemData(id='K-Prototype',title='K-Prototype',description='Partitional over Mixed Data')]
        )
        if algo == 'K-Means':
            return algos.clustering.kmeans.process
        if algo == 'DBSCAN':
            return algos.clustering.dbscan.process
        if algo == 'K-Prototype':
            return algos.clustering.kproto.process
    elif category == 'Classification':
        algo = stx.tab_bar(data=[
            stx.TabBarItemData(id='NN',title='Neural Network',description='Multi-Layer Perceptron classifier'),
            stx.TabBarItemData(id='SVM',title='Suport Vector Classifier',
                        description='Classification using Support Vector Machines'),
            stx.TabBarItemData(id='logR',title='Logistic Regression',description='Logistic Regression Classifier')]
        )
        if algo == 'NN':
            return algos.classification.nnclassifier.process
        if algo == 'SVM':
            return algos.classification.svmclassifier.process
        if algo == 'logR':
            return algos.classification.logistic.process
    elif category == 'Regression':
        algo = stx.tab_bar(data=[
            stx.TabBarItemData(id='linR',title='Linear Regression',description='Linear Regression'),
            stx.TabBarItemData(id='ridge',title='Ridge',
                        description='Ridge Regression'),
            stx.TabBarItemData(id='elastic',title='Elastic Net Regression',description='Elastic Net Regression')]
        )
        if algo == 'linR':
            return algos.regression.linR.process
        if algo == 'ridge':
            return algos.regression.ridge.process
        if algo == 'elastic':
            return algos.regression.elasticnet.process


def get_plot(df, title):

    if title == 'Regression':
        return None # Do not plot regression, display its coefficients

    reduce_algo = None
    pca = None

    # Better title for the graph
    viz_thing = 'Clusters'
    if title == 'Classification':
        viz_thing = 'Classes'

    # name of column to represent as color on the graph (target class)
    if type(df) == NoneType:
        return None
    if len(df) == 0:
        return None
    target_class = df.columns[-1]

    if df.shape == (0,0):
        return None

    if 'object' in list(df.dtypes):
        reduce_algo = 'FAMD'
        pca = prince.FAMD(n_components=3)
    else:
        reduce_algo = 'Principal Component Analysis'
        pca = prince.PCA(n_components=3)
    reduced = pca.fit(df.iloc[:,:-1]).row_coordinates(df.iloc[:,:-1])
    reduced.columns = ['X','Y','Z']
    reduced[target_class] = df[target_class].astype(str)
    # Each axe's inertia
    labs = {
        "X" : f"Component 0 - ({round(100*pca.explained_inertia_[0],2)}% inertia)",
        "Y" : f"Component 1 - ({round(100*pca.explained_inertia_[1],2)}% inertia)",
        "Z" : f"Component 2 - ({round(100*pca.explained_inertia_[2],2)}% inertia)",
    }
    tot_inertia = f"{round(100*pca.explained_inertia_.sum(),2)}"
    st.write(f'{reduce_algo} Visualization of {viz_thing} ({tot_inertia}%) :')
    fig = px.scatter_3d(reduced,x='X',y='Y',z='Z',color=target_class,labels=labs)
    fig.update_layout(margin=dict(l=0, r=0, b=0, t=0),showlegend=False,height=300)
    return fig