import os import sys from dataclasses import dataclass from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score from sklearn.linear_model import LinearRegression,Ridge,Lasso from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor,AdaBoostRegressor from xgboost import XGBRegressor from sklearn.neighbors import KNeighborsRegressor from catboost import CatBoostRegressor from src.exception import CustomException from src.logger import logging from src.utils import save_object,evaluate_models @dataclass class ModelTrainerConfig: trained_model_file_path = os.path.join('artifacts','model.pkl') class ModelTrainer: def __init__(self) -> None: self.model_trainer_config = ModelTrainerConfig() def initiate_model_trainer(self,train_array, test_array): try: logging.info('spliting training and test input data') X_train, y_train, X_test, y_test= ( train_array[:,:-1], train_array[:,-1], test_array[:,:-1], test_array[:,-1] ) models = { "Random Forest": RandomForestRegressor(), "Decision Tree": DecisionTreeRegressor(), "Gradient Boosting": GradientBoostingRegressor(), "Linear Regression": LinearRegression(), "XGBRegressor": XGBRegressor(), "CatBoosting Regressor": CatBoostRegressor(verbose=False), "AdaBoost Regressor": AdaBoostRegressor(), } params={ "Decision Tree": { 'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'], # 'splitter':['best','random'], # 'max_features':['sqrt','log2'], }, "Random Forest":{ # 'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'], # 'max_features':['sqrt','log2',None], 'n_estimators': [8,16,32,64,128,256] }, "Gradient Boosting":{ # 'loss':['squared_error', 'huber', 'absolute_error', 'quantile'], 'learning_rate':[.1,.01,.05,.001], 'subsample':[0.6,0.7,0.75,0.8,0.85,0.9], # 'criterion':['squared_error', 'friedman_mse'], # 'max_features':['auto','sqrt','log2'], 'n_estimators': [8,16,32,64,128,256] }, "Linear Regression":{}, "XGBRegressor":{ 'learning_rate':[.1,.01,.05,.001], 'n_estimators': [8,16,32,64,128,256] }, "CatBoosting Regressor":{ 'depth': [6,8,10], 'learning_rate': [0.01, 0.05, 0.1], 'iterations': [30, 50, 100] }, "AdaBoost Regressor":{ 'learning_rate':[.1,.01,0.5,.001], # 'loss':['linear','square','exponential'], 'n_estimators': [8,16,32,64,128,256] } } logging.info('training models') model_report:dict=evaluate_models(X_train=X_train,y_train=y_train,X_test=X_test,y_test=y_test, models=models,params=params) logging.info("model trained") best_model_score = max(sorted(model_report.values())) best_model_name = list(model_report.keys())[ list(model_report.values()).index(best_model_score) ] best_model = models[best_model_name] if best_model_score < 0.6: raise CustomException("NO best model found") logging.info("best model found") save_object( file_path=self.model_trainer_config.trained_model_file_path, obj=best_model ) predicted = best_model.predict(X_test) r2 = r2_score(y_test,predicted) return r2 except Exception as e: raise CustomException(e,sys)