File size: 97,011 Bytes
3c8c0e4 |
|
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "92e48866",
"metadata": {},
"source": [
"## Model Training"
]
},
{
"cell_type": "markdown",
"id": "25791a74",
"metadata": {},
"source": [
"#### 1.1 Import Data and Required Packages\n",
"##### Importing Pandas, Numpy, Matplotlib, Seaborn and Warings Library."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "b080dfb2",
"metadata": {},
"outputs": [],
"source": [
"# Basic Import\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"# Modelling\n",
"from sklearn.metrics import mean_squared_error, r2_score\n",
"from sklearn.neighbors import KNeighborsRegressor\n",
"from sklearn.tree import DecisionTreeRegressor\n",
"from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor\n",
"from sklearn.svm import SVR\n",
"from sklearn.linear_model import LinearRegression, Ridge, Lasso\n",
"from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error\n",
"from sklearn.model_selection import RandomizedSearchCV\n",
"from catboost import CatBoostRegressor\n",
"from xgboost import XGBRegressor\n",
"import warnings"
]
},
{
"cell_type": "markdown",
"id": "e45079ad",
"metadata": {},
"source": [
"#### Import the CSV Data as Pandas DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e11c6255",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('data/stud.csv')"
]
},
{
"cell_type": "markdown",
"id": "20634923",
"metadata": {},
"source": [
"#### Show Top 5 Records"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "e7e412a2",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>gender</th>\n",
" <th>race_ethnicity</th>\n",
" <th>parental_level_of_education</th>\n",
" <th>lunch</th>\n",
" <th>test_preparation_course</th>\n",
" <th>math_score</th>\n",
" <th>reading_score</th>\n",
" <th>writing_score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>female</td>\n",
" <td>group B</td>\n",
" <td>bachelor's degree</td>\n",
" <td>standard</td>\n",
" <td>none</td>\n",
" <td>72</td>\n",
" <td>72</td>\n",
" <td>74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>female</td>\n",
" <td>group C</td>\n",
" <td>some college</td>\n",
" <td>standard</td>\n",
" <td>completed</td>\n",
" <td>69</td>\n",
" <td>90</td>\n",
" <td>88</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>female</td>\n",
" <td>group B</td>\n",
" <td>master's degree</td>\n",
" <td>standard</td>\n",
" <td>none</td>\n",
" <td>90</td>\n",
" <td>95</td>\n",
" <td>93</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>male</td>\n",
" <td>group A</td>\n",
" <td>associate's degree</td>\n",
" <td>free/reduced</td>\n",
" <td>none</td>\n",
" <td>47</td>\n",
" <td>57</td>\n",
" <td>44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>male</td>\n",
" <td>group C</td>\n",
" <td>some college</td>\n",
" <td>standard</td>\n",
" <td>none</td>\n",
" <td>76</td>\n",
" <td>78</td>\n",
" <td>75</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" gender race_ethnicity parental_level_of_education lunch \\\n",
"0 female group B bachelor's degree standard \n",
"1 female group C some college standard \n",
"2 female group B master's degree standard \n",
"3 male group A associate's degree free/reduced \n",
"4 male group C some college standard \n",
"\n",
" test_preparation_course math_score reading_score writing_score \n",
"0 none 72 72 74 \n",
"1 completed 69 90 88 \n",
"2 none 90 95 93 \n",
"3 none 47 57 44 \n",
"4 none 76 78 75 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "markdown",
"id": "fbd32281",
"metadata": {},
"source": [
"#### Preparing X and Y variables"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "56d72fde",
"metadata": {},
"outputs": [],
"source": [
"X = df.drop(columns=[\"math_score\"], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "cd613177",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>gender</th>\n",
" <th>race_ethnicity</th>\n",
" <th>parental_level_of_education</th>\n",
" <th>lunch</th>\n",
" <th>test_preparation_course</th>\n",
" <th>reading_score</th>\n",
" <th>writing_score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>female</td>\n",
" <td>group B</td>\n",
" <td>bachelor's degree</td>\n",
" <td>standard</td>\n",
" <td>none</td>\n",
" <td>72</td>\n",
" <td>74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>female</td>\n",
" <td>group C</td>\n",
" <td>some college</td>\n",
" <td>standard</td>\n",
" <td>completed</td>\n",
" <td>90</td>\n",
" <td>88</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>female</td>\n",
" <td>group B</td>\n",
" <td>master's degree</td>\n",
" <td>standard</td>\n",
" <td>none</td>\n",
" <td>95</td>\n",
" <td>93</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>male</td>\n",
" <td>group A</td>\n",
" <td>associate's degree</td>\n",
" <td>free/reduced</td>\n",
" <td>none</td>\n",
" <td>57</td>\n",
" <td>44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>male</td>\n",
" <td>group C</td>\n",
" <td>some college</td>\n",
" <td>standard</td>\n",
" <td>none</td>\n",
" <td>78</td>\n",
" <td>75</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" gender race_ethnicity parental_level_of_education lunch \\\n",
"0 female group B bachelor's degree standard \n",
"1 female group C some college standard \n",
"2 female group B master's degree standard \n",
"3 male group A associate's degree free/reduced \n",
"4 male group C some college standard \n",
"\n",
" test_preparation_course reading_score writing_score \n",
"0 none 72 74 \n",
"1 completed 90 88 \n",
"2 none 95 93 \n",
"3 none 57 44 \n",
"4 none 78 75 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "f237ea14",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Categories in 'gender' variable: ['female' 'male']\n",
"Categories in 'race_ethnicity' variable: ['group B' 'group C' 'group A' 'group D' 'group E']\n",
"Categories in'parental level of education' variable: [\"bachelor's degree\" 'some college' \"master's degree\" \"associate's degree\"\n",
" 'high school' 'some high school']\n",
"Categories in 'lunch' variable: ['standard' 'free/reduced']\n",
"Categories in 'test preparation course' variable: ['none' 'completed']\n"
]
}
],
"source": [
"print(\"Categories in 'gender' variable: \", end=\" \")\n",
"print(df[\"gender\"].unique())\n",
"\n",
"print(\"Categories in 'race_ethnicity' variable: \", end=\" \")\n",
"print(df[\"race_ethnicity\"].unique())\n",
"\n",
"print(\"Categories in'parental level of education' variable:\", end=\" \")\n",
"print(df[\"parental_level_of_education\"].unique())\n",
"\n",
"print(\"Categories in 'lunch' variable: \", end=\" \")\n",
"print(df[\"lunch\"].unique())\n",
"\n",
"print(\"Categories in 'test preparation course' variable: \", end=\" \")\n",
"print(df[\"test_preparation_course\"].unique())"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "924b7f9d",
"metadata": {},
"outputs": [],
"source": [
"y = df[\"math_score\"]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "ffc69816",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 72\n",
"1 69\n",
"2 90\n",
"3 47\n",
"4 76\n",
" ..\n",
"995 88\n",
"996 62\n",
"997 59\n",
"998 68\n",
"999 77\n",
"Name: math_score, Length: 1000, dtype: int64"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "1e290fe3",
"metadata": {},
"outputs": [],
"source": [
"# Create Column Transformer with 3 types of transformers\n",
"num_features = X.select_dtypes(exclude=\"object\").columns\n",
"cat_features = X.select_dtypes(include=\"object\").columns\n",
"\n",
"from sklearn.preprocessing import OneHotEncoder, StandardScaler\n",
"from sklearn.compose import ColumnTransformer\n",
"\n",
"numeric_transformer = StandardScaler()\n",
"oh_transformer = OneHotEncoder()\n",
"\n",
"preprocessor = ColumnTransformer(\n",
" [\n",
" (\"OneHotEncoder\", oh_transformer, cat_features),\n",
" (\"StandardScaler\", numeric_transformer, num_features),\n",
" ]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "9c68f99a",
"metadata": {},
"outputs": [],
"source": [
"X = preprocessor.fit_transform(X)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "1f57b3ec",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 1. , 0. , 0. , ..., 1. ,\n",
" 0.19399858, 0.39149181],\n",
" [ 1. , 0. , 0. , ..., 0. ,\n",
" 1.42747598, 1.31326868],\n",
" [ 1. , 0. , 0. , ..., 1. ,\n",
" 1.77010859, 1.64247471],\n",
" ...,\n",
" [ 1. , 0. , 0. , ..., 0. ,\n",
" 0.12547206, -0.20107904],\n",
" [ 1. , 0. , 0. , ..., 0. ,\n",
" 0.60515772, 0.58901542],\n",
" [ 1. , 0. , 0. , ..., 1. ,\n",
" 1.15336989, 1.18158627]])"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "72459f1d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1000, 19)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X.shape"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "ed5c4e99",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((800, 19), (200, 19))"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# separate dataset into train and test\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(\n",
" X, y, test_size=0.2, random_state=42\n",
")\n",
"X_train.shape, X_test.shape"
]
},
{
"cell_type": "markdown",
"id": "4cd80317",
"metadata": {},
"source": [
"#### Create an Evaluate Function to give all metrics after model Training"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "8c247bd0",
"metadata": {},
"outputs": [],
"source": [
"def evaluate_model(true, predicted):\n",
" mae = mean_absolute_error(true, predicted)\n",
" # mse = mean_squared_error(true, predicted)\n",
" rmse = np.sqrt(mean_squared_error(true, predicted))\n",
" r2_square = r2_score(true, predicted)\n",
" return mae, rmse, r2_square"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "79ccb8e7",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Linear Regression\n",
"Model performance for Training set\n",
"- Root Mean Squared Error: 5.3231\n",
"- Mean Absolute Error: 4.2667\n",
"- R2 Score: 0.8743\n",
"----------------------------------\n",
"Model performance for Test set\n",
"- Root Mean Squared Error: 5.3940\n",
"- Mean Absolute Error: 4.2148\n",
"- R2 Score: 0.8804\n",
"===================================\n",
"\n",
"\n",
"Lasso\n",
"Model performance for Training set\n",
"- Root Mean Squared Error: 6.5938\n",
"- Mean Absolute Error: 5.2063\n",
"- R2 Score: 0.8071\n",
"----------------------------------\n",
"Model performance for Test set\n",
"- Root Mean Squared Error: 6.5197\n",
"- Mean Absolute Error: 5.1579\n",
"- R2 Score: 0.8253\n",
"===================================\n",
"\n",
"\n",
"Ridge\n",
"Model performance for Training set\n",
"- Root Mean Squared Error: 5.3233\n",
"- Mean Absolute Error: 4.2650\n",
"- R2 Score: 0.8743\n",
"----------------------------------\n",
"Model performance for Test set\n",
"- Root Mean Squared Error: 5.3904\n",
"- Mean Absolute Error: 4.2111\n",
"- R2 Score: 0.8806\n",
"===================================\n",
"\n",
"\n",
"K-Neighbors Regressor\n",
"Model performance for Training set\n",
"- Root Mean Squared Error: 5.7079\n",
"- Mean Absolute Error: 4.5168\n",
"- R2 Score: 0.8555\n",
"----------------------------------\n",
"Model performance for Test set\n",
"- Root Mean Squared Error: 7.2530\n",
"- Mean Absolute Error: 5.6210\n",
"- R2 Score: 0.7838\n",
"===================================\n",
"\n",
"\n",
"Decision Tree\n",
"Model performance for Training set\n",
"- Root Mean Squared Error: 0.2795\n",
"- Mean Absolute Error: 0.0187\n",
"- R2 Score: 0.9997\n",
"----------------------------------\n",
"Model performance for Test set\n",
"- Root Mean Squared Error: 7.9294\n",
"- Mean Absolute Error: 6.4050\n",
"- R2 Score: 0.7416\n",
"===================================\n",
"\n",
"\n",
"Random Forest Regressor\n",
"Model performance for Training set\n",
"- Root Mean Squared Error: 2.3125\n",
"- Mean Absolute Error: 1.8477\n",
"- R2 Score: 0.9763\n",
"----------------------------------\n",
"Model performance for Test set\n",
"- Root Mean Squared Error: 5.9646\n",
"- Mean Absolute Error: 4.6275\n",
"- R2 Score: 0.8538\n",
"===================================\n",
"\n",
"\n",
"XGBRegressor\n",
"Model performance for Training set\n",
"- Root Mean Squared Error: 1.0073\n",
"- Mean Absolute Error: 0.6875\n",
"- R2 Score: 0.9955\n",
"----------------------------------\n",
"Model performance for Test set\n",
"- Root Mean Squared Error: 6.4733\n",
"- Mean Absolute Error: 5.0577\n",
"- R2 Score: 0.8278\n",
"===================================\n",
"\n",
"\n",
"CatBoosting Regressor\n",
"Model performance for Training set\n",
"- Root Mean Squared Error: 3.0427\n",
"- Mean Absolute Error: 2.4054\n",
"- R2 Score: 0.9589\n",
"----------------------------------\n",
"Model performance for Test set\n",
"- Root Mean Squared Error: 6.0086\n",
"- Mean Absolute Error: 4.6125\n",
"- R2 Score: 0.8516\n",
"===================================\n",
"\n",
"\n",
"AdaBoost Regressor\n",
"Model performance for Training set\n",
"- Root Mean Squared Error: 5.8340\n",
"- Mean Absolute Error: 4.7767\n",
"- R2 Score: 0.8490\n",
"----------------------------------\n",
"Model performance for Test set\n",
"- Root Mean Squared Error: 6.0478\n",
"- Mean Absolute Error: 4.7079\n",
"- R2 Score: 0.8497\n",
"===================================\n",
"\n",
"\n"
]
}
],
"source": [
"models = {\n",
" \"Linear Regression\": LinearRegression(),\n",
" \"Lasso\": Lasso(),\n",
" \"Ridge\": Ridge(),\n",
" \"K-Neighbors Regressor\": KNeighborsRegressor(),\n",
" \"Decision Tree\": DecisionTreeRegressor(),\n",
" \"Random Forest Regressor\": RandomForestRegressor(),\n",
" \"XGBRegressor\": XGBRegressor(),\n",
" \"CatBoosting Regressor\": CatBoostRegressor(verbose=False),\n",
" \"AdaBoost Regressor\": AdaBoostRegressor(),\n",
"}\n",
"model_list = []\n",
"r2_list = []\n",
"\n",
"for i in range(len(list(models))):\n",
" model = list(models.values())[i]\n",
" model.fit(X_train, y_train) # Train model\n",
"\n",
" # Make predictions\n",
" y_train_pred = model.predict(X_train)\n",
" y_test_pred = model.predict(X_test)\n",
"\n",
" # Evaluate Train and Test dataset\n",
" model_train_mae, model_train_rmse, model_train_r2 = evaluate_model(\n",
" y_train, y_train_pred\n",
" )\n",
"\n",
" model_test_mae, model_test_rmse, model_test_r2 = evaluate_model(y_test, y_test_pred)\n",
"\n",
" print(list(models.keys())[i])\n",
" model_list.append(list(models.keys())[i])\n",
"\n",
" print(\"Model performance for Training set\")\n",
" print(\"- Root Mean Squared Error: {:.4f}\".format(model_train_rmse))\n",
" print(\"- Mean Absolute Error: {:.4f}\".format(model_train_mae))\n",
" print(\"- R2 Score: {:.4f}\".format(model_train_r2))\n",
"\n",
" print(\"----------------------------------\")\n",
"\n",
" print(\"Model performance for Test set\")\n",
" print(\"- Root Mean Squared Error: {:.4f}\".format(model_test_rmse))\n",
" print(\"- Mean Absolute Error: {:.4f}\".format(model_test_mae))\n",
" print(\"- R2 Score: {:.4f}\".format(model_test_r2))\n",
" r2_list.append(model_test_r2)\n",
"\n",
" print(\"=\" * 35)\n",
" print(\"\\n\")"
]
},
{
"cell_type": "markdown",
"id": "06480b5a",
"metadata": {},
"source": [
"### Results"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "e0159e5f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Model Name</th>\n",
" <th>R2_Score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Ridge</td>\n",
" <td>0.880593</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Linear Regression</td>\n",
" <td>0.880433</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Random Forest Regressor</td>\n",
" <td>0.853797</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>CatBoosting Regressor</td>\n",
" <td>0.851632</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>AdaBoost Regressor</td>\n",
" <td>0.849691</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>XGBRegressor</td>\n",
" <td>0.827797</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Lasso</td>\n",
" <td>0.825320</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>K-Neighbors Regressor</td>\n",
" <td>0.783813</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Decision Tree</td>\n",
" <td>0.741615</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Model Name R2_Score\n",
"2 Ridge 0.880593\n",
"0 Linear Regression 0.880433\n",
"5 Random Forest Regressor 0.853797\n",
"7 CatBoosting Regressor 0.851632\n",
"8 AdaBoost Regressor 0.849691\n",
"6 XGBRegressor 0.827797\n",
"1 Lasso 0.825320\n",
"3 K-Neighbors Regressor 0.783813\n",
"4 Decision Tree 0.741615"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(\n",
" list(zip(model_list, r2_list)), columns=[\"Model Name\", \"R2_Score\"]\n",
").sort_values(by=[\"R2_Score\"], ascending=False)"
]
},
{
"cell_type": "markdown",
"id": "357a7c1c",
"metadata": {},
"source": [
"## Linear Regression"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "9a6ad559",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Accuracy of the model is 88.04\n"
]
}
],
"source": [
"lin_model = LinearRegression(fit_intercept=True)\n",
"lin_model = lin_model.fit(X_train, y_train)\n",
"y_pred = lin_model.predict(X_test)\n",
"score = r2_score(y_test, y_pred) * 100\n",
"print(\" Accuracy of the model is %.2f\" % score)"
]
},
{
"cell_type": "markdown",
"id": "1d31453e",
"metadata": {},
"source": [
"## Plot y_pred and y_test"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "eb557b0a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Predicted')"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(y_test, y_pred)\n",
"plt.xlabel(\"Actual\")\n",
"plt.ylabel(\"Predicted\")"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "1e707ec3",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.regplot(x=y_test, y=y_pred, ci=None, color=\"red\");"
]
},
{
"cell_type": "markdown",
"id": "79c2fe28",
"metadata": {},
"source": [
"#### Difference between Actual and Predicted Values"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "7c9a8b48",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Actual Value</th>\n",
" <th>Predicted Value</th>\n",
" <th>Difference</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>521</th>\n",
" <td>91</td>\n",
" <td>76.387970</td>\n",
" <td>14.612030</td>\n",
" </tr>\n",
" <tr>\n",
" <th>737</th>\n",
" <td>53</td>\n",
" <td>58.885970</td>\n",
" <td>-5.885970</td>\n",
" </tr>\n",
" <tr>\n",
" <th>740</th>\n",
" <td>80</td>\n",
" <td>76.990265</td>\n",
" <td>3.009735</td>\n",
" </tr>\n",
" <tr>\n",
" <th>660</th>\n",
" <td>74</td>\n",
" <td>76.851804</td>\n",
" <td>-2.851804</td>\n",
" </tr>\n",
" <tr>\n",
" <th>411</th>\n",
" <td>84</td>\n",
" <td>87.627378</td>\n",
" <td>-3.627378</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>408</th>\n",
" <td>52</td>\n",
" <td>43.409149</td>\n",
" <td>8.590851</td>\n",
" </tr>\n",
" <tr>\n",
" <th>332</th>\n",
" <td>62</td>\n",
" <td>62.152214</td>\n",
" <td>-0.152214</td>\n",
" </tr>\n",
" <tr>\n",
" <th>208</th>\n",
" <td>74</td>\n",
" <td>67.888395</td>\n",
" <td>6.111605</td>\n",
" </tr>\n",
" <tr>\n",
" <th>613</th>\n",
" <td>65</td>\n",
" <td>67.022287</td>\n",
" <td>-2.022287</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78</th>\n",
" <td>61</td>\n",
" <td>62.345132</td>\n",
" <td>-1.345132</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>200 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" Actual Value Predicted Value Difference\n",
"521 91 76.387970 14.612030\n",
"737 53 58.885970 -5.885970\n",
"740 80 76.990265 3.009735\n",
"660 74 76.851804 -2.851804\n",
"411 84 87.627378 -3.627378\n",
".. ... ... ...\n",
"408 52 43.409149 8.590851\n",
"332 62 62.152214 -0.152214\n",
"208 74 67.888395 6.111605\n",
"613 65 67.022287 -2.022287\n",
"78 61 62.345132 -1.345132\n",
"\n",
"[200 rows x 3 columns]"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pred_df = pd.DataFrame(\n",
" {\"Actual Value\": y_test, \"Predicted Value\": y_pred, \"Difference\": y_test - y_pred}\n",
")\n",
"pred_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3acf1fbc",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|