|
from sklearn.linear_model import LinearRegression, SGDRegressor, Ridge, Lasso, ElasticNet
|
|
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor, HistGradientBoostingRegressor
|
|
from sklearn.neighbors import KNeighborsRegressor
|
|
from sklearn.tree import DecisionTreeRegressor
|
|
from sklearn.svm import SVR
|
|
from xgboost import XGBRegressor, XGBRFRegressor
|
|
from sklearn.neural_network import MLPRegressor
|
|
from lightgbm import LGBMRegressor
|
|
from sklearn.naive_bayes import GaussianNB
|
|
from sklearn.model_selection import GridSearchCV
|
|
from sklearn.datasets import make_regression
|
|
from sklearn.model_selection import train_test_split
|
|
import streamlit as st
|
|
import evaluationer
|
|
|
|
from sklearn.metrics import root_mean_squared_error
|
|
|
|
from sklearn.linear_model import LogisticRegression, SGDClassifier, RidgeClassifier
|
|
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier
|
|
from sklearn.neighbors import KNeighborsClassifier
|
|
from sklearn.tree import DecisionTreeClassifier
|
|
from sklearn.svm import SVC
|
|
from xgboost import XGBClassifier, XGBRFClassifier
|
|
from sklearn.neural_network import MLPClassifier
|
|
from lightgbm import LGBMClassifier
|
|
from sklearn.naive_bayes import MultinomialNB, CategoricalNB
|
|
|
|
param_grids_class = {
|
|
"Logistic Regression": {
|
|
'penalty': ['l1', 'l2', 'elasticnet', 'none'],
|
|
'C': [0.01, 0.1, 1, 10],
|
|
'solver': ['lbfgs', 'liblinear', 'saga']
|
|
},
|
|
|
|
"SGD Classifier": {
|
|
'loss': ['hinge', 'log', 'modified_huber', 'squared_hinge'],
|
|
'penalty': ['l2', 'l1', 'elasticnet'],
|
|
'alpha': [0.0001, 0.001, 0.01],
|
|
'max_iter': [1000, 5000, 10000]
|
|
},
|
|
|
|
"Ridge Classifier": {
|
|
'alpha': [0.1, 1, 10, 100]
|
|
},
|
|
|
|
"Random Forest Classifier": {
|
|
'n_estimators': [100, 200, 300],
|
|
'max_depth': [None, 10, 20, 30],
|
|
'min_samples_split': [2, 5, 10],
|
|
'min_samples_leaf': [1, 2, 4]
|
|
},
|
|
|
|
"AdaBoost Classifier": {
|
|
'n_estimators': [50, 100, 200],
|
|
'learning_rate': [0.01, 0.1, 1]
|
|
},
|
|
|
|
"Gradient Boosting Classifier": {
|
|
'n_estimators': [100, 200, 300],
|
|
'learning_rate': [0.01, 0.1, 0.2],
|
|
'max_depth': [3, 5, 7]
|
|
},
|
|
|
|
"Hist Gradient Boosting Classifier": {
|
|
'learning_rate': [0.01, 0.1, 0.2],
|
|
'max_depth': [None, 10, 20],
|
|
'min_samples_leaf': [20, 50, 100]
|
|
},
|
|
|
|
"K Neighbors Classifier": {
|
|
'n_neighbors': [3, 5, 7],
|
|
'weights': ['uniform', 'distance'],
|
|
'metric': ['euclidean', 'manhattan']
|
|
},
|
|
|
|
"Decision Tree Classifier": {
|
|
'max_depth': [None, 10, 20, 30],
|
|
'min_samples_split': [2, 5, 10],
|
|
'min_samples_leaf': [1, 2, 4]
|
|
},
|
|
|
|
"SVC": {
|
|
'C': [0.1, 1, 10],
|
|
'kernel': ['linear', 'poly', 'rbf'],
|
|
'degree': [3, 4, 5],
|
|
'gamma': ['scale', 'auto']
|
|
},
|
|
|
|
"XGB Classifier": {
|
|
'n_estimators': [100, 200, 300],
|
|
'learning_rate': [0.01, 0.1, 0.2],
|
|
'max_depth': [3, 5, 7]
|
|
},
|
|
|
|
"XGBRF Classifier": {
|
|
'n_estimators': [100, 200, 300],
|
|
'learning_rate': [0.01, 0.1, 0.2],
|
|
'max_depth': [3, 5, 7]
|
|
},
|
|
|
|
"MLP Classifier": {
|
|
'hidden_layer_sizes': [(50,), (100,), (50, 50)],
|
|
'activation': ['tanh', 'relu'],
|
|
'solver': ['adam', 'sgd'],
|
|
'alpha': [0.0001, 0.001, 0.01],
|
|
'learning_rate': ['constant', 'adaptive']
|
|
},
|
|
|
|
"LGBM Classifier": {
|
|
'n_estimators': [100, 200, 300],
|
|
'learning_rate': [0.01, 0.1, 0.2],
|
|
'max_depth': [-1, 10, 20]
|
|
},
|
|
|
|
"Multinomial Naive Bayes": {
|
|
'alpha': [0.1, 0.5, 1.0]
|
|
},
|
|
|
|
"Categorical Naive Bayes": {
|
|
'alpha': [0.1, 0.5, 1.0]
|
|
}
|
|
}
|
|
|
|
param_grids_reg = {
|
|
"Linear Regression": {},
|
|
|
|
"SGD Regressor": {
|
|
'loss': ['squared_loss', 'huber'],
|
|
'penalty': ['l2', 'l1', 'elasticnet'],
|
|
'alpha': [0.0001, 0.001, 0.01],
|
|
'max_iter': [1000, 5000, 10000]
|
|
},
|
|
|
|
"Ridge Regressor": {
|
|
'alpha': [0.1, 1, 10, 100],
|
|
'solver': ['auto', 'svd', 'cholesky', 'lsqr']
|
|
},
|
|
|
|
"Lasso Regressor": {
|
|
'alpha': [0.1, 1, 10, 100]
|
|
},
|
|
|
|
"ElasticNet Regressor": {
|
|
'alpha': [0.1, 1, 10, 100],
|
|
'l1_ratio': [0.1, 0.5, 0.9]
|
|
},
|
|
|
|
"Random Forest Regressor": {
|
|
'n_estimators': [100, 200, 300],
|
|
'max_depth': [None, 10, 20, 30],
|
|
'min_samples_split': [2, 5, 10],
|
|
'min_samples_leaf': [1, 2, 4]
|
|
},
|
|
|
|
"AdaBoost Regressor": {
|
|
'n_estimators': [50, 100, 200],
|
|
'learning_rate': [0.01, 0.1, 1]
|
|
},
|
|
|
|
"Gradient Boosting Regressor": {
|
|
'n_estimators': [100, 200, 300],
|
|
'learning_rate': [0.01, 0.1, 0.2],
|
|
'max_depth': [3, 5, 7]
|
|
},
|
|
|
|
"Hist Gradient Boosting Regressor": {
|
|
'learning_rate': [0.01, 0.1, 0.2],
|
|
'max_depth': [None, 10, 20],
|
|
'min_samples_leaf': [20, 50, 100]
|
|
},
|
|
|
|
"K Neighbors Regressor": {
|
|
'n_neighbors': [3, 5, 7],
|
|
'weights': ['uniform', 'distance'],
|
|
'metric': ['euclidean', 'manhattan']
|
|
},
|
|
|
|
"Decision Tree Regressor": {
|
|
'max_depth': [None, 10, 20, 30],
|
|
'min_samples_split': [2, 5, 10],
|
|
'min_samples_leaf': [1, 2, 4]
|
|
},
|
|
|
|
"SVR": {
|
|
'C': [0.1, 1, 10],
|
|
'kernel': ['linear', 'poly', 'rbf'],
|
|
'degree': [3, 4, 5],
|
|
'gamma': ['scale', 'auto']
|
|
},
|
|
|
|
"XGB Regressor": {
|
|
'n_estimators': [100, 200, 300],
|
|
'learning_rate': [0.01, 0.1, 0.2],
|
|
'max_depth': [3, 5, 7]
|
|
},
|
|
|
|
"XGBRF Regressor": {
|
|
'n_estimators': [100, 200, 300],
|
|
'learning_rate': [0.01, 0.1, 0.2],
|
|
'max_depth': [3, 5, 7]
|
|
},
|
|
|
|
"MLP Regressor": {
|
|
'hidden_layer_sizes': [(50,), (100,), (50, 50)],
|
|
'activation': ['tanh', 'relu'],
|
|
'solver': ['adam', 'sgd'],
|
|
'alpha': [0.0001, 0.001, 0.01],
|
|
'learning_rate': ['constant', 'adaptive']
|
|
},
|
|
|
|
"LGBM Regressor": {
|
|
'n_estimators': [100, 200, 300],
|
|
'learning_rate': [0.01, 0.1, 0.2],
|
|
'max_depth': [-1, 10, 20]
|
|
},
|
|
|
|
"Gaussian Naive Bayes": {
|
|
'var_smoothing': [1e-9, 1e-8, 1e-7]
|
|
}
|
|
}
|
|
|
|
|
|
regressors = {
|
|
"Linear Regression": LinearRegression(),
|
|
"SGD Regressor": SGDRegressor(),
|
|
"Ridge Regressor": Ridge(),
|
|
"Lasso Regressor": Lasso(),
|
|
"ElasticNet Regressor": ElasticNet(),
|
|
"Random Forest Regressor": RandomForestRegressor(),
|
|
"AdaBoost Regressor": AdaBoostRegressor(),
|
|
"Gradient Boosting Regressor": GradientBoostingRegressor(),
|
|
"Hist Gradient Boosting Regressor": HistGradientBoostingRegressor(),
|
|
"K Neighbors Regressor": KNeighborsRegressor(),
|
|
"Decision Tree Regressor": DecisionTreeRegressor(),
|
|
"SVR": SVR(),
|
|
"XGB Regressor": XGBRegressor(),
|
|
"XGBRF Regressor": XGBRFRegressor(),
|
|
"MLP Regressor": MLPRegressor(),
|
|
"LGBM Regressor": LGBMRegressor(),
|
|
"Gaussian Naive Bayes": GaussianNB()
|
|
}
|
|
|
|
classifiers = {
|
|
"Logistic Regression": LogisticRegression(),
|
|
"SGD Classifier": SGDClassifier(),
|
|
"Ridge Classifier": RidgeClassifier(),
|
|
"Random Forest Classifier": RandomForestClassifier(),
|
|
"AdaBoost Classifier": AdaBoostClassifier(),
|
|
"Gradient Boosting Classifier": GradientBoostingClassifier(),
|
|
"Hist Gradient Boosting Classifier": HistGradientBoostingClassifier(),
|
|
"K Neighbors Classifier": KNeighborsClassifier(),
|
|
"Decision Tree Classifier": DecisionTreeClassifier(),
|
|
"SVC": SVC(),
|
|
"XGB Classifier": XGBClassifier(),
|
|
"XGBRF Classifier": XGBRFClassifier(),
|
|
"MLP Classifier": MLPClassifier(),
|
|
"LGBM Classifier": LGBMClassifier(),
|
|
"Multinomial Naive Bayes": MultinomialNB(),
|
|
"Categorical Naive Bayes": CategoricalNB()
|
|
}
|
|
def perform_grid_search(model,model_name,X_train,X_test,y_train,y_test,eva):
|
|
if eva == "reg":
|
|
regressor = regressors[model_name]
|
|
|
|
param_grid_reg = param_grids_reg[model_name]
|
|
|
|
grid_search = GridSearchCV(estimator=regressor, param_grid=param_grid_reg, cv=5, scoring='neg_mean_squared_error')
|
|
grid_search.fit(X_train,y_train)
|
|
st.write(f"Best Parameters for {model_name}: {grid_search.best_params_}")
|
|
st.write(f"Best Score for {model_name}: {grid_search.best_score_}")
|
|
best_model = grid_search.best_estimator_
|
|
y_pred = best_model.predict(X_test)
|
|
evaluationer.evaluation("best hyperparams",X_train,X_test,y_train,y_test,model,root_mean_squared_error,eva)
|
|
elif eva == "class":
|
|
classifier = classifiers[model_name]
|
|
param_grid_class = param_grids_class[model_name]
|
|
|
|
grid_search = GridSearchCV(estimator=classifier, param_grid=param_grid_class, cv=5, scoring='accuracy')
|
|
grid_search.fit(X_train,y_train)
|
|
st.write(f"Best Parameters for {model_name}: {grid_search.best_params_}")
|
|
st.write(f"Best Score for {model_name}: {grid_search.best_score_}")
|
|
best_model = grid_search.best_estimator_
|
|
y_pred = best_model.predict(X_test)
|
|
evaluationer.evaluation("best hyperparams",X_train,X_test,y_train,y_test,model,root_mean_squared_error,eva)
|
|
|