import os import streamlit as st # EDA Pkgs import pandas as pd import numpy as np # Viz Pkgs import matplotlib matplotlib.use('Agg') import seaborn as sns from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler from sklearn.impute import SimpleImputer from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import OneHotEncoder from sklearn.naive_bayes import GaussianNB from sklearn.metrics import accuracy_score regressor = LogisticRegression() def main(): """ Common ML Dataset Explorer """ html_temp = """

Room Occupancy Predictor 💡

""" st.markdown(html_temp,unsafe_allow_html=True) def file_selector(folder_path='./dataset'): filenames = os.listdir(folder_path) selected_filename = st.selectbox("Select A file",filenames) return os.path.join(folder_path,selected_filename) filename = file_selector() st.info("You Selected {}".format(filename)) # Read Data df = pd.read_csv(filename) # Show Dataset if st.checkbox("Show Dataset"): st.write(df.astype(str)) # Show Columns if st.button("Column Names"): st.write(df.columns) # Show Shape if st.checkbox("Shape of Dataset"): data_dim = st.radio("Show Dimension By ",("Rows","Columns")) if data_dim == 'Rows': st.text("Number of Rows") st.write(df.shape[0]) elif data_dim == 'Columns': st.text("Number of Columns") st.write(df.shape[1]) else: st.write(df.shape) # Select Columns if st.checkbox("Select Columns To Show"): all_columns = df.columns.tolist() selected_columns = st.multiselect("Select",all_columns) new_df = df[selected_columns] st.dataframe(new_df) # Show Values if st.button("Value Counts"): st.text("Value Counts By Target/Class") st.write(df.iloc[:,-1].value_counts()) # Show Datatypes if st.button("Data Types"): st.text(df.dtypes) # Show Summary if st.checkbox("Summary"): st.write(df.describe().T) ## Plot and Visualization st.subheader("Data Visualization") # Correlation # Seaborn Plot if st.checkbox("Correlation Plot[Seaborn]"): st.set_option('deprecation.showPyplotGlobalUse', False) st.write(sns.heatmap(df.corr(),annot=True)) st.pyplot() # Pie Chart if st.checkbox("Pie Plot"): all_columns_names = df.columns.tolist() if st.button("Generate Pie Plot"): st.success("Generating A Pie Plot") st.write(df.iloc[:,-1].value_counts().plot.pie(autopct="%1.1f%%")) st.pyplot() # Count Plot if st.checkbox("Plot of Value Counts"): st.text("Value Counts By Target") all_columns_names = df.columns.tolist() primary_col = st.selectbox("Primary Columm to GroupBy",all_columns_names) selected_columns_names = st.multiselect("Select Columns",all_columns_names) if st.button("Plot"): st.text("Generate Plot") if selected_columns_names: vc_plot = df.groupby(primary_col)[selected_columns_names].count() else: vc_plot = df.iloc[:,-1].value_counts() st.write(vc_plot.plot(kind="bar")) st.pyplot() # Customizable Plot st.subheader("Customizable Plot") all_columns_names = df.columns.tolist() type_of_plot = st.selectbox("Select Type of Plot",["area","bar","line","hist","box","kde"]) selected_columns_names = st.multiselect("Select Columns To Plot",all_columns_names) if st.button("Generate Plot"): st.success("Generating Customizable Plot of {} for {}".format(type_of_plot,selected_columns_names)) # Plot By Streamlit if type_of_plot == 'area': cust_data = df[selected_columns_names] st.area_chart(cust_data) elif type_of_plot == 'bar': cust_data = df[selected_columns_names] st.bar_chart(cust_data) elif type_of_plot == 'line': cust_data = df[selected_columns_names] st.line_chart(cust_data) # Custom Plot elif type_of_plot: cust_plot= df[selected_columns_names].plot(kind=type_of_plot) st.write(cust_plot) st.pyplot() #Evaluate Model st.subheader("Model, Deployment, and Evaluation") # Impute nans with mean for numeris and most frequent for categoricals cat_imp = SimpleImputer(strategy="most_frequent") if len(df.loc[:,df.dtypes == 'object'].columns) != 0: df.loc[:,df.dtypes == 'object'] = cat_imp.fit_transform(df.loc[:,df.dtypes == 'object']) imp = SimpleImputer(missing_values = np.nan, strategy="mean") df.loc[:,df.dtypes != 'object'] = imp.fit_transform(df.loc[:,df.dtypes != 'object']) # One hot encoding for categorical variables features = st.multiselect('select features and target variable',df.columns.tolist()) cats = df.dtypes == 'object' le = LabelEncoder() for x in df.columns[cats]: df.loc[:,x] = le.fit_transform(df[x]) onehotencoder = OneHotEncoder() df.loc[:,~cats].join(pd.DataFrame(data=onehotencoder. fit_transform (df.loc[:,cats]).toarray(), columns=onehotencoder.get_feature_names_out())) chosen_target = st.sidebar.selectbox("Please choose target column", (df.columns)) X = df.loc[:, df.columns != chosen_target] scaler = MinMaxScaler(feature_range=(0,1)) scaler.fit(X) X = pd.DataFrame(scaler.transform(X)) X.columns = df.loc[:, df.columns != chosen_target].columns y = df[chosen_target] # Train test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) type = st.sidebar.selectbox("Algorithm type", ("Classification", "Classification")) if type == "Classification": chosen_classifier = st.sidebar.selectbox("Please choose a classifier", ('Logistic Regression', 'Naive Bayes')) if chosen_classifier == 'Logistic Regression': max_iter = st.sidebar.slider('max iterations', 1, 100, 10) if type == "Classification": if chosen_classifier == 'Logistic Regression': alg = LogisticRegression() model = alg.fit(X_train, y_train) predictions = alg.predict(X_test) predictions_train = alg.predict(X_train) elif chosen_classifier=='Naive Bayes': alg = GaussianNB() model = alg.fit(X_train, y_train) predictions = alg.predict(X_test) predictions_train = alg.predict(X_train) error_metrics = {} if type == 'Classification': error_metrics['Accuracy_test'] = accuracy_score(y_test, predictions) error_metrics['Accuracy_train'] = accuracy_score(y_train, predictions_train) st.write('### Accuracy Train: ' + str(round(error_metrics['Accuracy_train'], 3)) + ' -- Accuracy Test: ' + str(round(error_metrics['Accuracy_test'], 3))) if st.button("Thanks"): st.balloons() st.sidebar.header("About App") st.sidebar.info("A Simple ML App for predicting Room Occupancy") st.sidebar.header("Developer") st.sidebar.info("Nasim Obeid") st.sidebar.text("Built with Streamlit") if __name__ == '__main__': main()