import os import sys from src.exception import CustomException from src.logger import logging import pandas as pd from sklearn.model_selection import train_test_split from dataclasses import dataclass from src.components.data_transformation import DataTransformation,DataTransformationConfig from src.components.model_trainer import ModelTrainerConfig,ModelTrainer @dataclass class DataIngestionConfig: train_data_path: str=os.path.join('artifacts','train.csv') test_data_path: str = os.path.join('artifacts','test.csv') raw_data_path: str = os.path.join('artifacts','data.csv') class DataIngestion: def __init__(self): self.ingestion_config = DataIngestionConfig() def intiate_data_ingestion(self): logging.info("Entered the data ingestion method or component") try: df=pd.read_csv('notebook/data/stud.csv') logging.info('read the dataset as dataframe') ## make dir os.makedirs(os.path.dirname(self.ingestion_config.train_data_path),exist_ok=True) ## save raw data df.to_csv(self.ingestion_config.raw_data_path,index=False,header=True) ## train test split logging.info('Train test split initiated') train_set, test_set = train_test_split(df, test_size=0.2, random_state=42) train_set.to_csv(self.ingestion_config.train_data_path,index=False,header=True) test_set.to_csv(self.ingestion_config.test_data_path,index=False, header=True) logging.info("ingestion of data completed") return ( self.ingestion_config.train_data_path, self.ingestion_config.test_data_path, ) except Exception as e: raise CustomException(e,sys) if __name__ == '__main__': obj=DataIngestion() train_data_path, test_data_path = obj.intiate_data_ingestion() data_transformation = DataTransformation() train_arr, test_arr,_ = data_transformation.initiate_data_tranformation(train_data_path,test_data_path) modelTrainer=ModelTrainer() print(modelTrainer.initiate_model_trainer(train_array=train_arr,test_array=test_arr))