# import libraries import streamlit as st import joblib import numpy as np import pandas as pd from sklearn.model_selection import train_test_split as tts import evaluationer,models, null_value_handling import auto_optimizer from sklearn.experimental import enable_iterative_imputer from sklearn.impute import SimpleImputer, IterativeImputer # st.set_page_config(layout="wide") st.set_page_config( page_title="LazyML App", page_icon="🧊", initial_sidebar_state="expanded", menu_items={ 'Get Help': 'https://www.extremelycoolapp.com/help', 'Report a bug': "https://www.extremelycoolapp.com/bug", 'About': "# This is a header. This is an *extremely* cool app!" } ) import streamlit as st # Title with Rainbow Transition Effect and Neon Glow html_code = """

LazyML

""" st.markdown(html_code, unsafe_allow_html=True) # file uploader csv_upload = st.sidebar.file_uploader("Input CSV File for ML modelling", type=['csv']) csv_upload2 = st.sidebar.file_uploader("Input CSV File of Test Data Prediction",type = ["csv"]) test = pd.DataFrame() if csv_upload is not None: # read the uploaded file into dataframe df = pd.read_csv(csv_upload) # saving the dataframe to a CSV file df.to_csv('csv_upload.csv', index=False) st.write("Train File uploaded successfully. ✅") if csv_upload2 is not None: test = pd.read_csv(csv_upload2) id_col = st.selectbox("select column for submission i.e, ID",test.columns) submission_id = test[id_col] # st.write("Train File upl",submission_id) if len(test) >0: # saving the test dataframe to a CSV file test.to_csv('csv_upload_test.csv', index=False) st.write("Test File uploaded successfully. ✅") display_train_data = st.radio("Display Train Data",["Yes","No"],index = 1) if display_train_data == "Yes": st.dataframe(df.head()) if len(test) >0: display_test_data = st.radio("Display Test Data",["Yes","No"],index = 1) if display_test_data == "Yes": st.dataframe(test.head()) if st.radio("Select Supervision Category",["Supervised","Un-Supervised"],index =0) == "Supervised": selected_column = st.selectbox('Select Target column', df.columns, index=(len(df.columns)-1)) # Display the selected column st.write('You selected:', selected_column) y = df[selected_column] if y.dtype == "O": st.write("⚠️⚠️⚠️ Target Column is Object Type ⚠️⚠️⚠️") if st.radio("Proceed for Label Encoding ",["Yes","No"],index = 1) == "Yes": from sklearn.preprocessing import LabelEncoder le = LabelEncoder() y= pd.Series(le.fit_transform(y)) st.write("Label Encoding Completed ✅") if st.radio("Display Target Column",["Yes","No"],index =1) == "Yes": st.dataframe(y.head()) select_target_trans = st.radio("Target column Transformation",["Yes","No"],index = 1) if select_target_trans == "Yes": selected_transformation = st.selectbox("Select Transformation method",["Log Transformation","Power Transformation"]) if selected_transformation == "Log Transformation": if y.min() <=0: st.write("Values in target columns are zeroes or negative, please select power transformation") else: log_selected_transformation = st.selectbox("Select Logarithmic method",["Natural Log base(e)","Log base 10","Log base (2)"]) if log_selected_transformation == "Natural Log base(e)": y = np.log(y) st.write("Log base (e) Transformation Completed ✅") elif log_selected_transformation == "Log base 10": y = np.log10(y) st.write("Log base 10 Transformation Completed ✅") elif log_selected_transformation == "Log base (2)": y = np.log2(y) st.write("Log base 2 Transformation Completed ✅") elif selected_transformation == "Power Transformation": power_selected_transformation = st.selectbox("Select Power Transformation method",["Square Root","Other"]) if power_selected_transformation == "Square Root": y = np.sqrt(y) st.write("Square root Transformation Completed ✅") elif power_selected_transformation == "Other": power_value = st.number_input("Enter Power Value",value=3) y = y**(1/power_value) st.write(f"power root of {power_value} Transformation Completed ✅") if st.radio("Display Target Column after Transformation",["Yes","No"],index =1) == "Yes": st.dataframe(y.head()) # inverse of transformation X = df.drop(columns = selected_column) if st.radio("Display X-Train Data",["Yes","No"],index =1) == "Yes": st.dataframe(X.head()) if st.radio("Check for duplicate Values",["Yes","No"],index = 1) == "Yes": len_duplicates = len(X[X.duplicated()]) if len_duplicates >0: st.write(f"There are {len_duplicates} duplicate values in Train") if st.selectbox("Drop Duplicate values",["Yes","No"],index = 1) == "Yes": X = X.drop_duplicates() st.write("Duplicate values removed ✅") else: st.write("There are no duplicate values in Train") # dropping not important columns if st.radio("Drop Un-Important Column(s)",["Yes","No"],index = 1) == "Yes": selected_drop_column = st.multiselect('Select columns to be dropped', X.columns) X = X.drop(columns = selected_drop_column) if len(test) >0: test = test.drop(columns = selected_drop_column) st.write("Un-Important column(s) Delected ✅") st.dataframe(X.head()) num_cols = X.select_dtypes(exclude = "O").columns cat_cols = X.select_dtypes(include = "O").columns st.write("Numerical Columns in Train Data: ", tuple(num_cols)) st.write("Categorical Columns in Train Data: ", tuple(cat_cols)) if st.radio("Select method for ML modelling", ["Manual","Auto Optimized"],index = 0) == "Auto Optimized": ml_cat_ao = st.radio("Select Machine Learning Category",["Regression","Classification"],index =0) if ml_cat_ao =="Regression": eva = "reg" st.write("Select ML algorithm") reg_model_name = st.selectbox("select model",models.Regression_models.index) reg_model = models.Regression_models.loc[reg_model_name].values[0] auto_optimizer.Auto_optimizer(X,y,eva,reg_model) elif ml_cat_ao =="Classification": eva = "class" st.write("Select ML algorithm") class_model_name = st.selectbox("select model",models.Classification_models.index) class_model = models.Classification_models.loc[class_model_name].values[0] auto_optimizer.Auto_optimizer(X,y,eva,class_model) else: if X.isnull().sum().sum() >0 : st.write("⚠️⚠️⚠️ There are missing values in Train Data ⚠️⚠️⚠️") if st.selectbox("Drop null values or Impute",["Drop Null Values","Impute Null Values"],index = 1) == "Drop Null Values": X = X.dropna() if len(test) >0: st.write("⚠️⚠️⚠️ If choosing drop values, test dataset will also drop those values please choose missing value imputation method befittingly.⚠️⚠️⚠️ ") test = test.dropna() clean_num_nvh_df = pd.DataFrame() if X[num_cols].isnull().sum().sum() >0: st.write("Numerical Columns with Percentage of Null Values: ") num_cols_nvh = X[num_cols].isnull().sum()[X[num_cols].isnull().sum()>0].index st.dataframe(round(X[num_cols].isnull().sum()[X[num_cols].isnull().sum()>0]/len(X)*100,2)) dict_1= {} for nvh_method in null_value_handling.null_value_handling_method_num_cols : selected_nvh_num_cols = st.multiselect(f'method:- \"{nvh_method}\" for Numerical columns', num_cols_nvh,) dict_1[nvh_method] = selected_nvh_num_cols num_cols_nvh = set(num_cols_nvh) - set(selected_nvh_num_cols) if len(num_cols_nvh) ==0: break num_nvh_df = pd.DataFrame(data=dict_1.values(), index=dict_1.keys()) clean_num_nvh_df = num_nvh_df.T[num_nvh_df.T.count()[num_nvh_df.T.count()>0].index] st.write("Methods for Numerical columns null value handling",clean_num_nvh_df ) if len(test) >0: if test[num_cols].isnull().sum().sum() >0: test_num_cols_nvh = test[num_cols].isnull().sum()[test[num_cols].isnull().sum()>0].index st.write("Columns with Null Value in Test",test_num_cols_nvh) test[num_cols] = IterativeImputer(max_iter = 200,random_state= 42).fit_transform(test[num_cols]) clean_num_nvh_df_cat = pd.DataFrame() if X[cat_cols].isnull().sum().sum() >0: st.write("Categorical Columns with Percentage of Null Values: ") cat_cols_nvh = X[cat_cols].isnull().sum()[X[cat_cols].isnull().sum()>0].index st.dataframe(round(X[cat_cols].isnull().sum()[X[cat_cols].isnull().sum()>0]/len(X)*100,2)) dict_2= {} for nvh_method in null_value_handling.null_value_handling_method_cat_cols : st.write("dsff",nvh_method) selected_nvh_num_cols = st.multiselect(f'method:- \"{nvh_method}\" for Numerical columns', cat_cols_nvh,) dict_2[nvh_method] = selected_nvh_num_cols cat_cols_nvh = set(cat_cols_nvh) - set(selected_nvh_num_cols) if len(cat_cols_nvh) ==0: break num_nvh_df_cat = pd.DataFrame(data=dict_2.values(), index=dict_2.keys()) clean_num_nvh_df_cat = num_nvh_df_cat.T st.write("Methods for Categorical columns null value handling",[clean_num_nvh_df_cat]) if len(test) >0: if test[cat_cols].isnull().sum().sum() >0: test_num_cols_nvh_cat = test[cat_cols].isnull().sum()[test[cat_cols].isnull().sum()>0].index st.write("sdgs",test_num_cols_nvh_cat) test[cat_cols] = SimpleImputer(strategy = "most_frequent").fit_transform(test[cat_cols]) null_value_handling.null_handling(X,clean_num_nvh_df,clean_num_nvh_df_cat) st.write("X Data after Null value handling", X.head()) new_df = pd.concat([X,y[X.index]],axis = 1) csv = new_df.to_csv(index = False) if st.radio("Download Null Value Handled DataFrame as CSV File ? ",["Yes","No"],index = 1) == "Yes": st.download_button(label="Download Null Value Handled CSV File",data=csv,file_name='NVH_DataFrame.csv',mime='text/csv') ord_enc_cols = [] if len(cat_cols) == 0: st.write("No Categorical Columns in Train") else: st.write("Select Columns for Ordinal Encoding") for column in cat_cols: selected = st.checkbox(column) if selected: st.write(f"No. of Unique value in {column} column are", X[column].nunique()) ord_enc_cols.append(column) ohe_enc_cols = set(cat_cols) -set(ord_enc_cols) ohe_enc_cols = list(ohe_enc_cols) if len(ord_enc_cols)>0: st.write("ordinal encoded columns" ,tuple(ord_enc_cols)) if len(ohe_enc_cols)>0: st.write("one hot encoded columns" ,tuple(ohe_enc_cols)) if len(ord_enc_cols)>0: if st.radio("proceed for ordinal encoding",["Yes","No"],index = 1) == "Yes": ordinal_order_vals = [] for column in ord_enc_cols: unique_vals = X[column].unique() # st.write(f"No. of Unique value in {column} column are", len(unique_vals)) ordered_unique_vals = st.multiselect("Select values in order for Ordinal Encoding",unique_vals,unique_vals) ordinal_order_vals.append(ordered_unique_vals) st.write("order of values for Ordinal Encoding",tuple(ordinal_order_vals)) # import ordinal encoder from sklearn.preprocessing import OrdinalEncoder ord = OrdinalEncoder(categories=ordinal_order_vals,handle_unknown= "use_encoded_value",unknown_value = -1 ) X[ord_enc_cols] = ord.fit_transform(X[ord_enc_cols]) if len(test) >0: test[ord_enc_cols] = ord.transform(test[ord_enc_cols]) st.write("DataFrame after Ordinal Encoding",X.head()) st.write("Ordinal Encoding Completed ✅") if len(ohe_enc_cols)>0: if st.radio("proceed for OnehotEncoding ",["Yes","No"],index = 1) == "Yes": # import one hot encoder from sklearn.preprocessing import OneHotEncoder ohe = OneHotEncoder(sparse_output = False,handle_unknown = "ignore") pd.options.mode.chained_assignment = None X.loc[:, ohe.get_feature_names_out()] = ohe.fit_transform(X[ohe_enc_cols]) X.drop(columns = ohe_enc_cols,inplace = True) if len(test) >0: test.loc[:, ohe.get_feature_names_out()] = ohe.transform(test[ohe_enc_cols]) test.drop(columns = ohe_enc_cols,inplace = True) pd.options.mode.chained_assignment = 'warn' st.write("DataFrame after One Hot Encoding",X.head()) st.write("OneHot Encoding Completed ✅") new_df = pd.concat([X,y],axis = 1) csv = new_df.to_csv(index = False) if st.radio("Download Encoded DataFrame as CSV File ? ",["Yes","No"],index = 1) == "Yes": st.download_button(label="Download Ordinal Encoded CSV File",data=csv,file_name='Encoded_DataFrame.csv',mime='text/csv') random_state = st.number_input("Enter Random_state",max_value=100,min_value=1,value=42) test_size = st.number_input("Enter test_size",max_value=0.99, min_value = 0.01,value =0.2) if st.radio("select Train Validation Split Method", [f"Train_Test_split, Default (Random_state = {random_state},Test_size = {test_size})", "KFoldCV, Default (CV = 5)"], index = 0)== f"Train_Test_split, Default (Random_state = {random_state},Test_size = {test_size})": ttsmethod = "Train_Test_split" else: ttsmethod = "KFoldCV" st.write('You selected:', ttsmethod) if ttsmethod == "Train_Test_split": X_train,X_Val,y_train,y_val = tts(X,y[X.index],random_state = random_state,test_size = test_size) st.write('X-Training Data shape:', (X_train.info())) st.write('X-Training Data shape:', X_train.shape) st.write('X-Validation Data shape:', X_Val.shape) ml_cat = st.radio("Select Machine Learning Category",["Regression","Classification"],index =0) if ml_cat =="Regression": method_name_selector = st.selectbox("Select Error Evaluation Method",evaluationer.method_df.index,index = 0) method = evaluationer.method_df.loc[method_name_selector].values[0] reg_algorithm = [] selected_options = [] for option in models.Regression_models.index: selected = st.checkbox(option) if selected: selected_options.append(option) param = models.Regression_models.loc[option][0].get_params() Temp_parameter = pd.DataFrame(data=param.values(), index=param.keys()) Temp_parameter_transposed = Temp_parameter.T parameter = pd.DataFrame(data=param.values(), index=param.keys()) def is_boolean(val): return isinstance(val, bool) # Apply the function to the DataFrame column and create a new column with the resuSlts bool_cols= parameter[parameter[0].apply(is_boolean)].index param_transposed = parameter.T # st.write("hrweurgesj",param_transposed.loc[:, bool_cols]) # st.write("bool_cols",bool_cols) remaining_cols = set(param_transposed.columns) - set(bool_cols) remaining_cols = tuple(remaining_cols) # st.write("rem_Cols",remaining_cols) for col in remaining_cols: param_transposed[col] = pd.to_numeric(param_transposed[col],errors="ignore") cat_cols = param_transposed.select_dtypes(include = ["O"]).T.index.to_list() num_cols = set(remaining_cols) - set(cat_cols) cat_cols = set(cat_cols) - set(bool_cols) num_cols = tuple(num_cols) # st.write("sdsafdsd",num_cols) for i in num_cols: param_transposed[i] = st.number_input(f"input \"{i}\" value \n{option}",value = parameter.T[i].values[0]) for i in cat_cols: param_transposed[i] = st.text_input(f"input \"{i}\" value \n{option}",value = parameter.T[i].values[0]) for i in bool_cols: st.write("default value to insert",Temp_parameter_transposed[i].values[0]) param_transposed[i] = st.selectbox(f"input \"{i}\" value \n{option}",[False, True], index=Temp_parameter_transposed[i].values[0]) inv_param = param_transposed.T new_param = inv_param.dropna().loc[:,0].to_dict() # st.write("asad",new_param) models.Regression_models.loc[option][0].set_params(**new_param) a = models.Regression_models.loc[option][0].get_params() reg_algorithm.append(models.Regression_models.loc[option][0]) if st.button("Train Regression Model"): for algorithm in reg_algorithm: evaluationer.evaluation(f"{algorithm} baseline",X_train,X_Val,y_train,y_val,algorithm,method,"reg") st.write("Regression Model Trained Successfully",evaluationer.reg_evaluation_df) if len(test)>0: if st.radio("Predict",["Yes","No"],index = 1) =="Yes": if len(evaluationer.reg_evaluation_df) >0: a = st.number_input("select index of best algorithm for test prediction",min_value = 0,max_value =len(evaluationer.reg_evaluation_df) -1, value = len(evaluationer.reg_evaluation_df) -1) test_prediction = evaluationer.reg_evaluation_df.loc[a,"model"].predict(test) if select_target_trans == "Yes": if selected_transformation == "Log Transformation": if log_selected_transformation == "Natural Log base(e)": test_prediction = np.exp(test_prediction) st.write("Natural Log base(e) Inverse Transformation Completed ✅") elif log_selected_transformation == "Log base 10": test_prediction = np.power(10,test_prediction) st.write("Log base 10 Inverse Transformation Completed ✅") elif log_selected_transformation == "Log base (2)": test_prediction = np.power(2,test_prediction) st.write("Log base 2 Inverse Transformation Completed ✅") elif selected_transformation == "Power Transformation": if power_selected_transformation == "Square Root": test_prediction = np.power(test_prediction,2) st.write("Square root Inverse Transformation Completed ✅") elif power_selected_transformation == "Other": test_prediction = test_prediction**(power_value) st.write(f"power root of {power_value} Inverse Transformation Completed ✅") submission_file = pd.DataFrame(index = [submission_id],data = test_prediction,columns = [selected_column]) st.write("Sample of Prediction File",submission_file.head()) csv_prediction = submission_file.to_csv() if st.radio("Download Prediction File as CSV File ? ",["Yes","No"],index = 1) == "Yes": st.download_button(label="Download Prediction CSV File",data=csv_prediction,file_name='prediction.csv',mime='text/csv') if ml_cat =="Classification": cla_algorithm = [] selected_options = [] for option in models.Classification_models.index: selected = st.checkbox(option) if selected: selected_options.append(option) param = models.Classification_models.loc[option][0].get_params() parameter = pd.DataFrame(data=param.values(), index=param.keys()) Temp_parameter = parameter.copy() Temp_parameter_transposed = (Temp_parameter.T).copy() def is_boolean(val): return isinstance(val, bool) # Apply the function to the DataFrame column and create a new column with the resuSlts bool_cols= parameter[parameter[0].apply(is_boolean)].index param_transposed = parameter.T st.write("bool_cols",bool_cols) remaining_cols = set(param_transposed.columns) - set(bool_cols) remaining_cols = tuple(remaining_cols) st.write("rem_Cols",remaining_cols) for col in remaining_cols: param_transposed[col] = pd.to_numeric(param_transposed[col],errors="ignore") cat_cols = param_transposed.select_dtypes(include = ["O"]).T.index.to_list() num_cols = set(remaining_cols) - set(cat_cols) num_cols = tuple(num_cols) st.write("sdsafdsd",num_cols) for i in num_cols: param_transposed[i] = st.number_input(f"input \"{i}\" value \n{option}",value = parameter.T[i].values[0]) for i in cat_cols: param_transposed[i] = st.text_input(f"input \"{i}\" value \n{option}",value = parameter.T[i].values[0]) for i in bool_cols: st.write("default value to insert",Temp_parameter_transposed[i].values[0]) param_transposed[i] = st.selectbox(f"input \"{i}\" value \n{option}",[False,True], index=Temp_parameter_transposed[i].values[0]) inv_param = param_transposed.T new_param = inv_param.dropna().loc[:,0].to_dict() st.write("asad",new_param) models.Classification_models.loc[option][0].set_params(**new_param) a = models.Classification_models.loc[option][0].get_params() cla_algorithm.append(models.Classification_models.loc[option][0]) # st.write("sada",reg_algorithm/) if st.button("Train Regression Model"): method = None for algorithm in cla_algorithm: evaluationer.evaluation(f"{algorithm} baseline",X_train,X_Val,y_train,y_val,algorithm,method,eva ="class") st.write("Regression Model Trained Successfully",evaluationer.classification_evaluation_df) if len(test)>0: if st.radio("Predict",["Yes","No"],index = 1) =="Yes": if len(evaluationer.classification_evaluation_df) >0: a = st.number_input("select index of best algorithm for test prediction",min_value = 0,max_value =len(evaluationer.classification_evaluation_df) -1, value = len(evaluationer.classification_evaluation_df) -1) test_prediction = evaluationer.classification_evaluation_df.loc[a,"model"].predict(test) if select_target_trans == "Yes": if selected_transformation == "Log Transformation": if log_selected_transformation == "Natural Log base(e)": test_prediction = np.exp(test_prediction) st.write("Natural Log base(e) Inverse Transformation Completed ✅") elif log_selected_transformation == "Log base 10": test_prediction = np.power(10,test_prediction) st.write("Log base 10 Inverse Transformation Completed ✅") elif log_selected_transformation == "Log base (2)": test_prediction = np.power(2,test_prediction) st.write("Log base 2 Inverse Transformation Completed ✅") elif selected_transformation == "Power Transformation": if power_selected_transformation == "Square Root": test_prediction = np.power(test_prediction,2) st.write("Square root Inverse Transformation Completed ✅") elif power_selected_transformation == "Other": test_prediction = test_prediction**(power_value) st.write(f"power root of {power_value} Inverse Transformation Completed ✅") submission_file = pd.DataFrame(index = [submission_id],data = test_prediction,columns = [selected_column]) st.write("Sample of Prediction File",submission_file.head()) csv_prediction = submission_file.to_csv() if st.radio("Download Prediction File as CSV File ? ",["Yes","No"],index = 1) == "Yes": st.download_button(label="Download Prediction CSV File",data=csv_prediction,file_name='prediction.csv',mime='text/csv')