student-performance / src /components /data_ingestion.py
singhjagpreet's picture
model trainer
13993ca
raw
history blame
No virus
2.22 kB
import os
import sys
from src.exception import CustomException
from src.logger import logging
import pandas as pd
from sklearn.model_selection import train_test_split
from dataclasses import dataclass
from src.components.data_transformation import DataTransformation,DataTransformationConfig
from src.components.model_trainer import ModelTrainerConfig,ModelTrainer
@dataclass
class DataIngestionConfig:
train_data_path: str=os.path.join('artifacts','train.csv')
test_data_path: str = os.path.join('artifacts','test.csv')
raw_data_path: str = os.path.join('artifacts','data.csv')
class DataIngestion:
def __init__(self):
self.ingestion_config = DataIngestionConfig()
def intiate_data_ingestion(self):
logging.info("Entered the data ingestion method or component")
try:
df=pd.read_csv('notebook/data/stud.csv')
logging.info('read the dataset as dataframe')
## make dir
os.makedirs(os.path.dirname(self.ingestion_config.train_data_path),exist_ok=True)
## save raw data
df.to_csv(self.ingestion_config.raw_data_path,index=False,header=True)
## train test split
logging.info('Train test split initiated')
train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)
train_set.to_csv(self.ingestion_config.train_data_path,index=False,header=True)
test_set.to_csv(self.ingestion_config.test_data_path,index=False, header=True)
logging.info("ingestion of data completed")
return (
self.ingestion_config.train_data_path,
self.ingestion_config.test_data_path,
)
except Exception as e:
raise CustomException(e,sys)
if __name__ == '__main__':
obj=DataIngestion()
train_data_path, test_data_path = obj.intiate_data_ingestion()
data_transformation = DataTransformation()
train_arr, test_arr,_ = data_transformation.initiate_data_tranformation(train_data_path,test_data_path)
modelTrainer=ModelTrainer()
print(modelTrainer.initiate_model_trainer(train_array=train_arr,test_array=test_arr))