student-performance / src /components /data_ingestion.py
singhjagpreet's picture
model trainer
13993ca
raw
history blame contribute delete
No virus
2.22 kB
import os
import sys
from src.exception import CustomException
from src.logger import logging
import pandas as pd
from sklearn.model_selection import train_test_split
from dataclasses import dataclass
from src.components.data_transformation import DataTransformation,DataTransformationConfig
from src.components.model_trainer import ModelTrainerConfig,ModelTrainer
@dataclass
class DataIngestionConfig:
train_data_path: str=os.path.join('artifacts','train.csv')
test_data_path: str = os.path.join('artifacts','test.csv')
raw_data_path: str = os.path.join('artifacts','data.csv')
class DataIngestion:
def __init__(self):
self.ingestion_config = DataIngestionConfig()
def intiate_data_ingestion(self):
logging.info("Entered the data ingestion method or component")
try:
df=pd.read_csv('notebook/data/stud.csv')
logging.info('read the dataset as dataframe')
## make dir
os.makedirs(os.path.dirname(self.ingestion_config.train_data_path),exist_ok=True)
## save raw data
df.to_csv(self.ingestion_config.raw_data_path,index=False,header=True)
## train test split
logging.info('Train test split initiated')
train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)
train_set.to_csv(self.ingestion_config.train_data_path,index=False,header=True)
test_set.to_csv(self.ingestion_config.test_data_path,index=False, header=True)
logging.info("ingestion of data completed")
return (
self.ingestion_config.train_data_path,
self.ingestion_config.test_data_path,
)
except Exception as e:
raise CustomException(e,sys)
if __name__ == '__main__':
obj=DataIngestion()
train_data_path, test_data_path = obj.intiate_data_ingestion()
data_transformation = DataTransformation()
train_arr, test_arr,_ = data_transformation.initiate_data_tranformation(train_data_path,test_data_path)
modelTrainer=ModelTrainer()
print(modelTrainer.initiate_model_trainer(train_array=train_arr,test_array=test_arr))