{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "CC - Module Python pour le Machine Learning Bekombo IABD B3 Soir" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import pycaret as py" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StoreDateDaily_SalesHoliday_FlagTemperatureFuel_PriceCPIUnemployment
012010-05-021643690.90042.312.572211.0963588.106
112010-12-021641957.44138.512.548211.2421708.106
212010-02-191611968.17039.932.514211.2891438.106
312010-02-261409727.59046.632.561211.3196438.106
412010-05-031554806.68046.502.625211.3501438.106
\n", "
" ], "text/plain": [ " Store Date Daily_Sales Holiday_Flag Temperature Fuel_Price \\\n", "0 1 2010-05-02 1643690.90 0 42.31 2.572 \n", "1 1 2010-12-02 1641957.44 1 38.51 2.548 \n", "2 1 2010-02-19 1611968.17 0 39.93 2.514 \n", "3 1 2010-02-26 1409727.59 0 46.63 2.561 \n", "4 1 2010-05-03 1554806.68 0 46.50 2.625 \n", "\n", " CPI Unemployment \n", "0 211.096358 8.106 \n", "1 211.242170 8.106 \n", "2 211.289143 8.106 \n", "3 211.319643 8.106 \n", "4 211.350143 8.106 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = pd.read_csv(\"./datasets/Walmart.csv\")\n", "data.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Store 0\n", "Date 0\n", "Daily_Sales 0\n", "Holiday_Flag 0\n", "Temperature 0\n", "Fuel_Price 0\n", "CPI 0\n", "Unemployment 0\n", "dtype: int64" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.isna().sum()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False 6435\n", "dtype: int64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "duplicates = data.duplicated()\n", "duplicates.value_counts()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Store int64\n", "Date object\n", "Daily_Sales float64\n", "Holiday_Flag int64\n", "Temperature float64\n", "Fuel_Price float64\n", "CPI float64\n", "Unemployment float64\n", "dtype: object" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.dtypes" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 6435 entries, 0 to 6434\n", "Data columns (total 8 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Store 6435 non-null int64 \n", " 1 Date 6435 non-null object \n", " 2 Daily_Sales 6435 non-null float64\n", " 3 Holiday_Flag 6435 non-null int64 \n", " 4 Temperature 6435 non-null float64\n", " 5 Fuel_Price 6435 non-null float64\n", " 6 CPI 6435 non-null float64\n", " 7 Unemployment 6435 non-null float64\n", "dtypes: float64(5), int64(2), object(1)\n", "memory usage: 402.3+ KB\n" ] } ], "source": [ "data.info()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\n", " 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,\n", " 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], dtype=int64)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.Store.unique()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "#data['Date'] = pd.to_datetime(data['Date'])\n", "#data" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 6435 entries, 0 to 6434\n", "Data columns (total 8 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Store 6435 non-null int64 \n", " 1 Date 6435 non-null object \n", " 2 Daily_Sales 6435 non-null float64\n", " 3 Holiday_Flag 6435 non-null int64 \n", " 4 Temperature 6435 non-null float64\n", " 5 Fuel_Price 6435 non-null float64\n", " 6 CPI 6435 non-null float64\n", " 7 Unemployment 6435 non-null float64\n", "dtypes: float64(5), int64(2), object(1)\n", "memory usage: 402.3+ KB\n" ] } ], "source": [ "data.info()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StoreDateDaily_SalesHoliday_FlagTemperatureFuel_PriceCPIUnemployment
012010-05-021643690.90042.312.572211.0963588.106
112010-12-021641957.44138.512.548211.2421708.106
212010-02-191611968.17039.932.514211.2891438.106
312010-02-261409727.59046.632.561211.3196438.106
412010-05-031554806.68046.502.625211.3501438.106
...........................
6430452012-09-28713173.95064.883.997192.0135588.684
6431452012-05-10733455.07064.893.985192.1704128.667
6432452012-12-10734464.36054.474.000192.3272658.667
6433452012-10-19718125.53056.473.969192.3308548.667
6434452012-10-26760281.43058.853.882192.3088998.667
\n", "

6435 rows × 8 columns

\n", "
" ], "text/plain": [ " Store Date Daily_Sales Holiday_Flag Temperature Fuel_Price \\\n", "0 1 2010-05-02 1643690.90 0 42.31 2.572 \n", "1 1 2010-12-02 1641957.44 1 38.51 2.548 \n", "2 1 2010-02-19 1611968.17 0 39.93 2.514 \n", "3 1 2010-02-26 1409727.59 0 46.63 2.561 \n", "4 1 2010-05-03 1554806.68 0 46.50 2.625 \n", "... ... ... ... ... ... ... \n", "6430 45 2012-09-28 713173.95 0 64.88 3.997 \n", "6431 45 2012-05-10 733455.07 0 64.89 3.985 \n", "6432 45 2012-12-10 734464.36 0 54.47 4.000 \n", "6433 45 2012-10-19 718125.53 0 56.47 3.969 \n", "6434 45 2012-10-26 760281.43 0 58.85 3.882 \n", "\n", " CPI Unemployment \n", "0 211.096358 8.106 \n", "1 211.242170 8.106 \n", "2 211.289143 8.106 \n", "3 211.319643 8.106 \n", "4 211.350143 8.106 \n", "... ... ... \n", "6430 192.013558 8.684 \n", "6431 192.170412 8.667 \n", "6432 192.327265 8.667 \n", "6433 192.330854 8.667 \n", "6434 192.308899 8.667 \n", "\n", "[6435 rows x 8 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StoreDaily_SalesHoliday_FlagTemperatureFuel_PriceCPIUnemployment
count6435.0000006.435000e+036435.0000006435.0000006435.0000006435.0000006435.000000
mean23.0000001.046965e+060.06993060.6637823.358607171.5783947.999151
std12.9881825.643666e+050.25504918.4449330.45902039.3567121.875885
min1.0000002.099862e+050.000000-2.0600002.472000126.0640003.879000
25%12.0000005.533501e+050.00000047.4600002.933000131.7350006.891000
50%23.0000009.607460e+050.00000062.6700003.445000182.6165217.874000
75%34.0000001.420159e+060.00000074.9400003.735000212.7432938.622000
max45.0000003.818686e+061.000000100.1400004.468000227.23280714.313000
\n", "
" ], "text/plain": [ " Store Daily_Sales Holiday_Flag Temperature Fuel_Price \\\n", "count 6435.000000 6.435000e+03 6435.000000 6435.000000 6435.000000 \n", "mean 23.000000 1.046965e+06 0.069930 60.663782 3.358607 \n", "std 12.988182 5.643666e+05 0.255049 18.444933 0.459020 \n", "min 1.000000 2.099862e+05 0.000000 -2.060000 2.472000 \n", "25% 12.000000 5.533501e+05 0.000000 47.460000 2.933000 \n", "50% 23.000000 9.607460e+05 0.000000 62.670000 3.445000 \n", "75% 34.000000 1.420159e+06 0.000000 74.940000 3.735000 \n", "max 45.000000 3.818686e+06 1.000000 100.140000 4.468000 \n", "\n", " CPI Unemployment \n", "count 6435.000000 6435.000000 \n", "mean 171.578394 7.999151 \n", "std 39.356712 1.875885 \n", "min 126.064000 3.879000 \n", "25% 131.735000 6.891000 \n", "50% 182.616521 7.874000 \n", "75% 212.743293 8.622000 \n", "max 227.232807 14.313000 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.describe()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "#df = data.rename(columns={'Weekly_Sales': 'Daily_Sales'})\n", "\n", "#csv_data = df.to_csv(index=False)\n", "#with open('Walmart.csv', 'w') as f:\n", " #f.write(csv_data)\n", "#df" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "df = data.drop(columns={'Daily_Sales'}, axis=1)\n", "csv_data = df.to_csv(index=False)\n", "with open('./datasets/test_Walmart.csv', 'w') as f:\n", " f.write(csv_data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Outliers checking" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAggAAAGwCAYAAADMjZ3mAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAATVElEQVR4nO3dfWyVd9nA8et0fTmFvghTQQLlRSaKBh4LMpE/FoWJRN1g6LInJuLQxWhnQIxGEhnOzDDnH+rMxC0mmy/ZZqaWbRpURIZz2SZjYQFFoskSScpAl7B23Vpeej9/7OE89Lk2h6ztKfD5JE3O+/3rrvWc7+77nJ1SURRFAACcpqbaCwAARh+BAAAkAgEASAQCAJAIBAAgEQgAQCIQAICk9mzvODAwEF1dXdHc3BylUmko1wQADJOiKKKnpycmTZoUNTWvvJ/grAOhq6srpkyZcrZ3BwCq6ODBgzF58uRXvP6sA6G5ubmygZaWlrN9GABgBHV3d8eUKVMqr+Ov5KwD4dRhhZaWFoEAAOeYV3t7gDcpAgCJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgKS22gvg/FIURfT19VV7GbwGRVFEf39/REQ0NDREqVSq8op4LcrlshlyVgQCQ6qvry+WLVtW7WUA/2vr1q3R2NhY7WVwDnKIAQBI7EFg2Dz/X/8dRY1/xc45J49H81P3RkREz9xrIi6qq/KC+E+VBk5E0557qr0MznGevRk2RU2tF5dz3UV1ZngOKqq9AM4LDjEAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQ1FZ7AacriiL6+voiIqJcLkepVKryigBgZI2W18JRtQehr68vli1bFsuWLav8wwGAC8loeS0cVYEAAIwOAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAAJLaai/gdEVRVE739fVVcSWcrUFzO22ewAjyXHpOO31mRRWfR884EPr7+6O/v79yvru7e8gXc/rjr1ixYsgfnxE2cCIi6qu9CrjwDJyonPRcem7r7++PMWPGVGXbZ3yIYdOmTdHa2lr5mTJlynCuCwCoojPeg7B+/fpYt25d5Xx3d/eQR0JDQ0PldGdnZ5TL5SF9fIZfX1/f//0XS82oOoIFF47T/vY8l557Tn8ePf11caSd8TN4Q0PDsC+0VCpVTpfL5WhsbBzW7THMTpsnMII8l543SlV8HvUpBgAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJDUVnsBpyuXy7F169bKaQC40IyW18JRFQilUikaGxurvQwAqJrR8lroEAMAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAABJbbUXwPmrNHAiimovgv/cyeMvf5pzRmngRLWXwHlAIDBsmvbcU+0l8Bo1P3VvtZcAVIlDDABAYg8CQ6pcLsfWrVurvQxeg6Ioor+/PyIiGhoaolQqVXlFvBblcrnaS+AcJRAYUqVSKRobG6u9DF6jMWPGVHsJQJU5xAAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJLVne8eiKCIioru7e8gWAwAMr1Ov26dex1/JWQdCT09PRERMmTLlbB8CAKiSnp6eaG1tfcXrS8WrJcQrGBgYiK6urmhubo5SqfSqt+/u7o4pU6bEwYMHo6Wl5Ww2yTAxm9HJXEYvsxmdzOXMFEURPT09MWnSpKipeeV3Gpz1HoSampqYPHnyf3y/lpYWgxulzGZ0MpfRy2xGJ3N5df9uz8Ep3qQIACQCAQBIRiwQGhoaYuPGjdHQ0DBSm+QMmc3oZC6jl9mMTuYytM76TYoAwPnLIQYAIBEIAEAiEACARCAAAMmIBcJtt90W06ZNi3K5HJdeemn86U9/GqlNExF/+MMf4sMf/nBMmjQpSqVSbNmyZdD1RVHEDTfcEG9605uisbExlixZEn/729+qs9gLyKZNm+Jd73pXNDc3xxvf+MZYvnx5HDhwYNBt+vr6oqOjIy6++OJoamqKlStXxuHDh6u04gvH5s2bY86cOZX/6c7ChQtj69atlevNZXS4+eabo1Qqxdq1ayuXmc3QGJFA+OlPfxrr1q2LjRs3xpNPPhlz586NpUuXxpEjR0Zi80REb29vzJ07N2677baXvf6WW26JW2+9Nb7//e/H448/HmPHjo2lS5dGX1/fCK/0wrJz587o6OiIxx57LLZt2xbHjx+P97///dHb21u5zec///l48MEH47777oudO3dGV1dXXHXVVVVc9YVh8uTJcfPNN8fu3bvjiSeeiPe9731x5ZVXxp///OeIMJfRYNeuXXH77bfHnDlzBl1uNkOkGAELFiwoOjo6KudPnjxZTJo0qdi0adNIbJ7/JyKKzs7OyvmBgYFi4sSJxTe/+c3KZUePHi0aGhqKe+65pworvHAdOXKkiIhi586dRVG8NIe6urrivvvuq9xm//79RUQUjz76aLWWecEaN25c8YMf/MBcRoGenp7ikksuKbZt21ZcdtllxZo1a4qi8DczlIZ9D8KxY8di9+7dsWTJksplNTU1sWTJknj00UeHe/OcgaeffjqeeeaZQTNqbW2NSy+91IxG2HPPPRcREePHj4+IiN27d8fx48cHzeatb31rtLW1mc0IOnnyZNx7773R29sbCxcuNJdRoKOjIz74wQ8OmkGEv5mhdNZf1nSm/vWvf8XJkydjwoQJgy6fMGFC/PWvfx3uzXMGnnnmmYiIl53RqesYfgMDA7F27dpYtGhRvOMd74iIl2ZTX18fr3vd6wbd1mxGxt69e2PhwoXR19cXTU1N0dnZGbNnz449e/aYSxXde++98eSTT8auXbvSdf5mhs6wBwJwZjo6OmLfvn3xxz/+sdpL4X/NmjUr9uzZE88991z87Gc/i1WrVsXOnTurvawL2sGDB2PNmjWxbdu2KJfL1V7OeW3YDzG8/vWvj4suuii9g/Tw4cMxceLE4d48Z+DUHMyoeq6//vr45S9/GTt27Bj0NeoTJ06MY8eOxdGjRwfd3mxGRn19fcycOTPmzZsXmzZtirlz58Z3vvMdc6mi3bt3x5EjR6K9vT1qa2ujtrY2du7cGbfeemvU1tbGhAkTzGaIDHsg1NfXx7x582L79u2VywYGBmL79u2xcOHC4d48Z2D69OkxceLEQTPq7u6Oxx9/3IyGWVEUcf3110dnZ2f8/ve/j+nTpw+6ft68eVFXVzdoNgcOHIh//OMfZlMFAwMD0d/fby5VtHjx4ti7d2/s2bOn8jN//vz42Mc+VjltNkNjRA4xrFu3LlatWhXz58+PBQsWxLe//e3o7e2Na6+9diQ2T0Q8//zz8fe//71y/umnn449e/bE+PHjo62tLdauXRs33XRTXHLJJTF9+vTYsGFDTJo0KZYvX169RV8AOjo64u677477778/mpubK8dIW1tbo7GxMVpbW+OTn/xkrFu3LsaPHx8tLS3xuc99LhYuXBjvfve7q7z689v69etj2bJl0dbWFj09PXH33XfHQw89FL/5zW/MpYqam5sr79E5ZezYsXHxxRdXLjebITJSH5f47ne/W7S1tRX19fXFggULiscee2ykNk1RFDt27CgiIv2sWrWqKIqXPuq4YcOGYsKECUVDQ0OxePHi4sCBA9Vd9AXg5WYSEcWdd95Zuc2LL75YfPazny3GjRtXjBkzplixYkVx6NCh6i36ArF69epi6tSpRX19ffGGN7yhWLx4cfHb3/62cr25jB6nf8yxKMxmqPi6ZwAg8V0MAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEOA89M9//jM+85nPRFtbWzQ0NMTEiRNj6dKl8cgjj0RERKlUii1btlR3kcCoNiJf1gSMrJUrV8axY8fihz/8YcyYMSMOHz4c27dvj2effXZIt3P8+PGoq6sb0scERgd7EOA8c/To0Xj44YfjG9/4Rrz3ve+NqVOnxoIFC2L9+vVxxRVXxLRp0yIiYsWKFVEqlSrnIyI2b94cb37zm6O+vj5mzZoVP/7xjwc9dqlUis2bN8cVV1wRY8eOja9//esREXH//fdHe3t7lMvlmDFjRtx4441x4sSJkfqVgWEgEOA809TUFE1NTbFly5bo7+9P1+/atSsiIu688844dOhQ5XxnZ2esWbMmvvCFL8S+ffvi05/+dFx77bWxY8eOQff/6le/GitWrIi9e/fG6tWr4+GHH46Pf/zjsWbNmvjLX/4St99+e9x1112VeADOTb7NEc5DP//5z+O6666LF198Mdrb2+Oyyy6La665JubMmRMRL+0J6OzsjOXLl1fus2jRonj7298ed9xxR+Wyq6++Onp7e+NXv/pV5X5r166Nb33rW5XbLFmyJBYvXhzr16+vXPaTn/wkvvSlL0VXV9cw/6bAcLEHAc5DK1eujK6urnjggQfiAx/4QDz00EPR3t4ed9111yveZ//+/bFo0aJBly1atCj2798/6LL58+cPOv/UU0/F1772tcqei6amprjuuuvi0KFD8cILLwzZ7wSMLG9ShPNUuVyOyy+/PC6//PLYsGFDfOpTn4qNGzfGJz7xidf0uGPHjh10/vnnn48bb7wxrrrqqpddA3BusgcBLhCzZ8+O3t7eiIioq6uLkydPDrr+bW97W+VjkKc88sgjMXv27H/7uO3t7XHgwIGYOXNm+qmp8RQD5yp7EOA88+yzz8ZHP/rRWL16dcyZMyeam5vjiSeeiFtuuSWuvPLKiIiYNm1abN++PRYtWhQNDQ0xbty4+OIXvxhXX311vPOd74wlS5bEgw8+GL/4xS/id7/73b/d3g033BAf+tCHoq2tLT7ykY9ETU1NPPXUU7Fv37646aabRuJXBoZDAZxX+vr6ii9/+ctFe3t70draWowZM6aYNWtW8ZWvfKV44YUXiqIoigceeKCYOXNmUVtbW0ydOrVy3+9973vFjBkzirq6uuItb3lL8aMf/WjQY0dE0dnZmbb561//unjPe95TNDY2Fi0tLcWCBQuKO+64Yzh/TWCY+RQDAJA4QAgAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQ/A98H0udADQqQwAAAABJRU5ErkJggg==", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.boxplot(data=data, x=data.Store)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAggAAAGxCAYAAAAH0U5DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAfsklEQVR4nO3de3CV5Z3A8d9JgASBgEC5oygKtV4QqFh0rWBBQKHSG1Yo4oirtroFqTJ2Z3ZQyy7rrtWlHXtVoVVYreu17WLXqsCWy1YDVHStVaugFXC1DhdrEJJ3/3DJJj65kwsJn89MxuS873nO87xvTs435xxiLsuyLAAAKshr6QkAAIcegQAAJAQCAJAQCABAQiAAAAmBAAAkBAIAkBAIAECiXUOvWFZWFm+++WZ06dIlcrlcY84JAGgiWZbF7t27o1+/fpGXV/3zBA0OhDfffDMGDhzY0KsDAC3o9ddfjwEDBlS7vcGB0KVLl/IbKCoqaugwAEAz2rVrVwwcOLD8cbw6DQ6EAy8rFBUVCQQAaGVqe3uANykCAAmBAAAkBAIAkBAIAEBCIAAACYEAACQEAgCQEAgAQEIgAAAJgQAAJAQCAJAQCABAQiAAAAmBAAAkBAIAkBAIAEBCIAAACYEAACQEAgCQEAgAQEIgAAAJgQAAJAQCAJAQCABAQiAAAAmBAAAkBAIAkBAIAEBCIAAACYEAACQEAgCQEAgAQKJdS0+gtcqyLEpKSlp6Go0iy7LYu3dvREQUFBRELpdr4Rk1vcLCwsNinQANJRAaqKSkJCZNmtTS06CBVqxYER07dmzpaQAcsrzEAAAkPIPQCPacelFkea34UJbuiy6/uzciInYP+3JEfvsWnlDTyJXtj86b/rWlpwHQKrTiR7VDR5bXru08qOa3bztr+YispScA0Ip4iQEASAgEACAhEACAhEAAABICAQBICAQAICEQAICEQAAAEgIBAEgIBAAgIRAAgIRAAAASAgEASAgEACAhEACAhEAAABICAQBICAQAICEQAICEQAAAEgIBAEgIBAAgIRAAgIRAAAASAgEASAgEACAhEACAhEAAABICAQBICAQAICEQAICEQAAAEgIBAEgIBAAgIRAAgIRAAAASAgEASAgEACAhEACAhEAAABICAQBICAQAICEQAICEQAAAEgIBAEgIBAAgIRAAgIRAAAASAgEASAgEACAhEACAhEAAABICAQBICAQAICEQAICEQAAAEgIBAEgIBAAgIRAAgIRAAAASAgEASAgEACAhEACAhEAAABICAQBICAQAICEQAICEQAAAEgIBAEi0a+kJVJRlWZSUlERERGFhYeRyuRaeEcDB8XON1uqQegahpKQkJk2aFJMmTSq/QwG0Zn6u0VodUoEAABwaBAIAkBAIAEBCIAAACYEAACQEAgCQEAgAQEIgAAAJgQAAJAQCAJAQCABAQiAAAAmBAAAkBAIAkBAIAEBCIAAACYEAACQEAgCQEAgAQEIgAAAJgQAAJAQCAJAQCABAQiAAAAmBAAAkBAIAkBAIAEBCIAAACYEAACQEAgCQEAgAQEIgAAAJgQAAJAQCAJAQCABAQiAAAAmBAAAkBAIAkBAIAEBCIAAACYEAACQEAgCQEAgAQEIgAAAJgQAAJAQCAJAQCABAQiAAAAmBAAAkBAIAkBAIAEBCIAAACYEAACQEAgCQEAgAQEIgAAAJgQAAJAQCAJAQCABAQiAAAAmBAAAkBAIAkBAIAEBCIAAACYEAACQEAgCQEAgAQEIgAAAJgQAAJAQCQAtYu3ZtnH/++TF27NiYMGFCjBs3LsaMGRNjxoyJG264IdauXRsXXnhh3HDDDTF27Ng4//zzY+3atckYF154YaXLD1x25513lm9bu3ZtTJ06NaZOnZqMUd04DdmnrutujnFq2l7TcavueNVn3vU5nhXPU33HaGq5LMuyhlxx165d0bVr19i5c2cUFRU1ymTef//9mDRpUkRErFixIjp27Ngo4zaFinPdPWJmRH77Fp7RQSjdF1023B0RbWAtNamwzkP9+4u2o6qfayUlJTF9+vT485//XO31jjzyyHj33XcrXdajR49YtmxZFBYWRklJSXzlK1+Jt99+O3r27Bn33HNPRET5ZXl5eVFWVhY9evSILMvKb6viGBFR5TgHth1Ql33qornGqWl7bcetquN15513xmWXXVanedf3eB44TxX3bazjVJ26Pn57BgGgmS1btqzGOIiIJA4iIt55551Yvnx5+RjvvPNOpcsrXlZWVla+reJtVRyjunGqmm9t+9RFc41T0/bajltVx+vv/u7v6jzv+h7PiueppnPbEtq1yK1Wo+KTGSUlJS04k9pVml/DnoShubWi7y/ajorfa1mWxRtvvFH+W2tDLFu2LE455ZRYvnx5+c/MLMti2bJl5Z/XZYxzzz03IiIZZ/ny5XHuuefGgAEDIiLijTfeqHWfumiucWraXtV6ly1bFlmW1XjcNm/eXP55TfOuyxo/us9Hx63q3DbkODWGOgfC3r17Y+/eveVf79q1q9EnU3H8z33uc40+fpMp2x8RHVp6FtSmbH/5p63q+4s2o6SkJG677bY6PYhXp7S0NG666aZkjNLS0nqNsXjx4irnkWVZLF68OP7pn/4pIiIWL15c4z65XK7W2zuwf1OPc/PNN1e7/V/+5V+qvI36HLea5l2XNUZEtcf9wL5Vndv6HqfGUudAWLRoUdx4441NOReANu2NN96I4uLigx6nMX5Be/rpp6u8vLS0NJ5++unYunVrtftV3Ofoo4+u9ba2bt3aLOP813/9V7Xbn3nmmVrHr6uq5l2XNUZUf9wP7FvVua3vcWosdQ6Eb37zmzFv3rzyr3ft2hUDBw5s1MkUFBSUf/7QQw816psyGltJScn//xaad0i9UkN1KpynQ/37i7aj4s+KwYMHx8iRIw86EoqKimLPnj3lr183xGmnnRZZlsXGjRsr/Radn58fI0eOjKOOOqp8vw0bNtS4T22OOuqoZhnn9NNPr3b7iBEjIiKSbQ1R1bzrusbTTjstiouLqzx3+fn50alTp+Tc1vc4NZY6P7IVFBRUegBvChWfOiksLGw97zJvxqd8OAit9fuLNiMvLy+uueaamDlzZoNfZsjPz48FCxbE/Pnzk8sj6vaUeX5+fsydOzeyLItZs2ZV2pbL5WLOnDnlP4/nzJlT6z61ObB/U4+Tl5dX7fbq1pufnx9ZltUrtqqad13XWNU+Ffet6tzW9zg1Fv+KAaAZDRgwIL7yla80+PozZsyIkSNHxvTp08sfMHK5XMyYMaPSZbWN0b9//xgwYEAyzvTp06N///6V5lvbPnXRXOPUtL2qbTNmzIgZM2bUeNxOPvnkOs27IcfzgAP7VnVuG3KcGoNAAGhmM2bMiO7du9e4z5FHHplc1qNHj5g+fXr5GD169IiIiJ49e8b06dMrXZaXl1e+reJtHdi34lw+Ok5V861tn7pornFq2l7bcavqeH3rW9+q87zrezwrnqeazm1LEAgAzaywsDCuvfba6NSpU+RyuSgoKIh27f7/Fd8xY8bEddddF717944xY8ZELpeLTp06xTe+8Y3y984UFhbGvHnzonfv3nHNNddEYWFhpctmzJgRvXv3jnnz5sW1114b3bp1i27dusW8efMqvf+mqnGqmm9t+9R13c0xTk3baztuVR2vA/+ty7zrezwPnKeK+zbWcTpY/pJiA/lLiq2Qv6RIC2hNP9c4PPhLigBAgwkEACAhEACAhEAAABICAQBICAQAICEQAICEQAAAEgIBAEgIBAAgIRAAgIRAAAASAgEASAgEACAhEACAhEAAABICAQBICAQAICEQAICEQAAAEgIBAEgIBAAgIRAAgIRAAAASAgEASAgEACAhEACAhEAAABICAQBICAQAICEQAICEQAAAEgIBAEgIBAAgIRAAgIRAAAASAgEASAgEACAhEACAhEAAABICAQBICAQAICEQAICEQAAAEgIBAEgIBAAgIRAAgIRAAAASAgEASAgEACAhEACAhEAAABICAQBICAQAICEQAICEQAAAEgIBAEgIBAAgIRAAgIRAAAASAgEASAgEACAhEACAhEAAABICAQBICAQAICEQAICEQAAAEu1aegIVFRYWxooVK8o/B2jt/FyjtTqkAiGXy0XHjh1behoAjcbPNVorLzEAAAmBAAAkBAIAkBAIAEBCIAAACYEAACQEAgCQEAgAQEIgAAAJgQAAJAQCAJAQCABAQiAAAAmBAAAkBAIAkBAIAEBCIAAACYEAACQEAgCQEAgAQEIgAAAJgQAAJAQCAJAQCABAQiAAAAmBAAAkBAIAkBAIAEBCIAAACYEAACQEAgCQEAgAQEIgAAAJgQAAJAQCAJAQCABAQiAAAAmBAAAkBAIAkBAIAEBCIAAACYEAACQEAgCQEAgAQEIgAAAJgQAAJAQCAJAQCABAQiAAAAmBAAAkBAIAkBAIAEBCIAAACYEAACQEAgCQEAgAQEIgAAAJgQAAJAQCAJAQCABAQiAAAAmBAAAkBAIAkBAIAEBCIAAACYEAACQEAgCQEAgAQEIgAACJdi09gbYgV7Y/spaexMEo3Vf1521Mrmx/S08BoNUQCI2g86Z/bekpNJouv7u3pacAwCHASwwAQMIzCA1UWFgYK1asaOlpNIosy2Lv3r0REVFQUBC5XK6FZ9T0CgsLW3oKAIc0gdBAuVwuOnbs2NLTaDRHHHFES08BgEOIlxgAgIRAAAASAgEASAgEACAhEACAhEAAABICAQBICAQAICEQAICEQAAAEgIBAEgIBAAgIRAAgIRAAAASAgEASAgEACAhEACAhEAAABICAQBICAQAICEQAICEQAAAEgIBAEgIBAAgIRAAgIRAAAASAgEASAgEACAhEACAhEAAABICAQBICAQAICEQAIBEu4ZeMcuyiIjYtWtXo00GAGhaBx63DzyOV6fBgbB79+6IiBg4cGBDhwAAWsju3buja9eu1W7PZbUlRDXKysrizTffjC5dukQul2vwBFuDXbt2xcCBA+P111+PoqKilp5Oszlc1x1x+K79cF13xOG7dus+vNYd8eEzB7t3745+/fpFXl717zRo8DMIeXl5MWDAgIZevVUqKio67L6RIg7fdUccvms/XNcdcfiu3boPLzU9c3CANykCAAmBAAAkBEIdFBQUxIIFC6KgoKClp9KsDtd1Rxy+az9c1x1x+K7dug+vdddHg9+kCAC0XZ5BAAASAgEASAgEACAhEP7P7bffHoMGDYrCwsI4/fTT47e//W21+y5dujRyuVylj8LCwmacbeNYvXp1TJkyJfr16xe5XC4efvjhWq+zcuXKGDFiRBQUFMRxxx0XS5cubfJ5Nrb6rnvlypXJ+c7lcrF9+/bmmXAjWbRoUZx22mnRpUuX6NWrV0ydOjVefPHFWq93//33x8c//vEoLCyMk08+Of793/+9GWbbuBqy9rZwP//+978fp5xySvm/9R89enSsWLGixuu0hfMdUf+1t4Xz3dgEQkTcd999MW/evFiwYEFs2LAhhg0bFhMmTIi33nqr2usUFRXFtm3byj+2bNnSjDNuHO+9914MGzYsbr/99jrt/+qrr8b5558fY8eOjU2bNsXcuXPjsssui1/96ldNPNPGVd91H/Diiy9WOue9evVqohk2jVWrVsVVV10V69evj8cffzz27dsX5557brz33nvVXmft2rVx0UUXxezZs2Pjxo0xderUmDp1ajz33HPNOPOD15C1R7T++/mAAQPiH//xH6O4uDieeeaZOOecc+KCCy6I559/vsr928r5jqj/2iNa//ludBnZqFGjsquuuqr869LS0qxfv37ZokWLqtx/yZIlWdeuXZtpds0jIrKHHnqoxn3mz5+fnXjiiZUuu/DCC7MJEyY04cyaVl3W/dRTT2URkb377rvNMqfm8tZbb2URka1atarafaZNm5adf/75lS47/fTTsyuuuKKpp9ek6rL2tng/z7IsO/LII7M77rijym1t9XwfUNPa2+r5PhiH/TMIH3zwQRQXF8e4cePKL8vLy4tx48bFunXrqr3enj174uijj46BAwfWWqVtxbp16yodp4iICRMm1Hic2pJTTz01+vbtG+PHj481a9a09HQO2s6dOyMionv37tXu01bPeV3WHtG27uelpaVx7733xnvvvRejR4+ucp+2er7rsvaItnW+G8NhHwhvv/12lJaWRu/evStd3rt372pfYx46dGjcdddd8cgjj8Q999wTZWVlccYZZ8Qbb7zRHFNuMdu3b6/yOO3atSvef//9FppV0+vbt2/84Ac/iAceeCAeeOCBGDhwYIwZMyY2bNjQ0lNrsLKyspg7d26ceeaZcdJJJ1W7X3XnvLW9/6Kiuq69rdzPN2/eHJ07d46CgoK48sor46GHHopPfOITVe7b1s53fdbeVs53Y2rw/6zpcDZ69OhKFXrGGWfECSecED/84Q/jW9/6VgvOjKYwdOjQGDp0aPnXZ5xxRrzyyitx2223xd13392CM2u4q666Kp577rn4zW9+09JTaXZ1XXtbuZ8PHTo0Nm3aFDt37ox/+7d/i1mzZsWqVauqfaBsS+qz9rZyvhvTYR8IPXv2jPz8/NixY0ely3fs2BF9+vSp0xjt27eP4cOHx8svv9wUUzxk9OnTp8rjVFRUFB07dmyhWbWMUaNGtdoH16uvvjp+8YtfxOrVq2v9P7JWd87ret841NRn7R/VWu/nHTp0iOOOOy4iIkaOHBlPP/10LF68OH74wx8m+7a1812ftX9Uaz3fjemwf4mhQ4cOMXLkyHjiiSfKLysrK4snnniixteqKiotLY3NmzdH3759m2qah4TRo0dXOk4REY8//nidj1NbsmnTplZ3vrMsi6uvvjoeeuihePLJJ+OYY46p9Tpt5Zw3ZO0f1Vbu52VlZbF3794qt7WV812dmtb+UW3lfB+Uln6X5KHg3nvvzQoKCrKlS5dm//3f/51dfvnlWbdu3bLt27dnWZZlM2fOzK6//vry/W+88cbsV7/6VfbKK69kxcXF2Ze//OWssLAwe/7551tqCQ2ye/fubOPGjdnGjRuziMhuvfXWbOPGjdmWLVuyLMuy66+/Pps5c2b5/n/84x+zI444IrvuuuuyF154Ibv99tuz/Pz87LHHHmupJTRIfdd92223ZQ8//HD20ksvZZs3b87mzJmT5eXlZb/+9a9bagkN8tWvfjXr2rVrtnLlymzbtm3lH3/5y1/K9/no9/qaNWuydu3aZbfcckv2wgsvZAsWLMjat2+fbd68uSWW0GANWXtbuJ9ff/312apVq7JXX301e/bZZ7Prr78+y+Vy2X/8x39kWdZ2z3eW1X/tbeF8NzaB8H+++93vZkcddVTWoUOHbNSoUdn69evLt5199tnZrFmzyr+eO3du+b69e/fOzjvvvGzDhg0tMOuDc+Cf733048BaZ82alZ199tnJdU499dSsQ4cO2bHHHpstWbKk2ed9sOq77ptvvjkbPHhwVlhYmHXv3j0bM2ZM9uSTT7bM5A9CVWuOiErn8KPf61mWZT/72c+yIUOGZB06dMhOPPHE7Je//GXzTrwRNGTtbeF+fumll2ZHH3101qFDh+xjH/tY9pnPfKb8ATLL2u75zrL6r70tnO/G5v/mCAAkDvv3IAAAKYEAACQEAgCQEAgAQEIgAAAJgQAAJAQCAJAQCABwCFm9enVMmTIl+vXrF7lcLh5++OF6j5FlWdxyyy0xZMiQKCgoiP79+8ff//3f12sMgQBtxNKlS6Nbt27lX99www1x6qmntth8qtLQH3ZwOHnvvfdi2LBhcfvttzd4jDlz5sQdd9wRt9xyS/z+97+PRx99NEaNGlWvMQQCtLBLLrkkcrlc5HK5aN++ffTu3TvGjx8fd911V5SVldV5nAsvvDD+8Ic/NNk8f/zjH8ewYcOic+fO0a1btxg+fHgsWrSoyW4PDleTJk2KhQsXxuc+97kqt+/duzeuvfba6N+/f3Tq1ClOP/30WLlyZfn2F154Ib7//e/HI488Ep/97GfjmGOOiZEjR8b48ePrNQ+BAIeAiRMnxrZt2+K1116LFStWxNixY2POnDkxefLk2L9/f53G6NixY/Tq1atJ5nfXXXfF3Llz4+tf/3ps2rQp1qxZE/Pnz489e/Y0ye0B1bv66qtj3bp1ce+998azzz4bX/rSl2LixInx0ksvRUTEz3/+8zj22GPjF7/4RRxzzDExaNCguOyyy+LPf/5zvW5HIMAhoKCgIPr06RP9+/ePESNGxN/+7d/GI488EitWrIilS5dGRMStt94aJ598cnTq1CkGDhwYX/va1yo9QH/0JYaKVq9eHe3bt4/t27dXunzu3Llx1lln1Tq/Rx99NKZNmxazZ8+O4447Lk488cS46KKLKr2m+fTTT8f48eOjZ8+e0bVr1zj77LNjw4YNNY77+uuvx7Rp06Jbt27RvXv3uOCCC+K1114r375y5coYNWpUdOrUKbp16xZnnnlmbNmypdb5Qlu1devWWLJkSdx///1x1llnxeDBg+Paa6+Nv/qrv4olS5ZERMQf//jH2LJlS9x///3x05/+NJYuXRrFxcXxxS9+sV63JRDgEHXOOefEsGHD4sEHH4yIiLy8vPjOd74Tzz//fPzkJz+JJ598MubPn1+nsT796U/HscceG3fffXf5Zfv27Ytly5bFpZdeWuv1+/TpE+vXr6/xwXn37t0xa9as+M1vfhPr16+P448/Ps4777zYvXt3lfvv27cvJkyYEF26dIn//M//jDVr1kTnzp1j4sSJ8cEHH8T+/ftj6tSpcfbZZ8ezzz4b69ati8svvzxyuVyd1gxt0ebNm6O0tDSGDBkSnTt3Lv9YtWpVvPLKKxERUVZWFnv37o2f/vSncdZZZ8WYMWPizjvvjKeeeipefPHFOt9Wu6ZaBHDwPv7xj8ezzz4bER/+tn/AoEGDYuHChXHllVfG9773vTqNNXv27FiyZElcd911EfHh05AlJSUxbdq0Wq+7YMGC+PznPx+DBg2KIUOGxOjRo+O8886LL37xi5GX9+HvGeecc06l6/zoRz+Kbt26xapVq2Ly5MnJmPfdd1+UlZXFHXfcUf6gv2TJkujWrVusXLkyPvnJT8bOnTtj8uTJMXjw4IiIOOGEE+q0Vmir9uzZE/n5+VFcXBz5+fmVtnXu3DkiIvr27Rvt2rWLIUOGlG87cN/ZunVrDB06tE635RkEOIRlWVb+4PnrX/86PvOZz0T//v2jS5cuMXPmzHjnnXfiL3/5S53GuuSSS+Lll1+O9evXR8SHL0lMmzYtOnXqVOt1+/btG+vWrYvNmzfHnDlzYv/+/TFr1qyYOHFi+Rspd+zYEX/9138dxx9/fHTt2jWKiopiz549sXXr1irH/N3vfhcvv/xydOnSpfy3oO7du0dJSUm88sor0b1797jkkktiwoQJMWXKlFi8eHFs27atTmuFtmr48OFRWloab731Vhx33HGVPvr06RMREWeeeWbs37+//BmFiCh/A/PRRx9d59sSCHAIe+GFF+KYY46J1157LSZPnhynnHJKPPDAA1FcXFz+T6A++OCDOo3Vq1evmDJlSixZsiR27NgRK1asqNPLCxWddNJJ8bWvfS3uueeeePzxx+Pxxx+PVatWRUTErFmzYtOmTbF48eJYu3ZtbNq0KXr06FHt/Pbs2RMjR46MTZs2Vfr4wx/+ENOnT4+ID59RWLduXZxxxhlx3333xZAhQ8oDB9qqPXv2lN8fIiJeffXV2LRpU2zdujWGDBkSM2bMiIsvvjgefPDBePXVV+O3v/1tLFq0KH75y19GRMS4ceNixIgRcemll8bGjRujuLg4rrjiihg/fnylZxVq4yUGOEQ9+eSTsXnz5rjmmmuiuLg4ysrK4tvf/nb5U/o/+9nP6j3mZZddFhdddFEMGDAgBg8eHGeeeWaD5/eJT3wiIj78N9sREWvWrInvfe97cd5550XEh29AfPvtt6u9/ogRI+K+++6LXr16RVFRUbX7DR8+PIYPHx7f/OY3Y/To0bF8+fL41Kc+1eB5w6HumWeeibFjx5Z/PW/evIj4MMKXLl0aS5YsiYULF8Y3vvGN+NOf/hQ9e/aMT33qU+Uv5eXl5cXPf/7z+Ju/+Zv49Kc/HZ06dYpJkybFt7/97XrNQyDAIWDv3r2xffv2KC0tjR07dsRjjz0WixYtismTJ8fFF18czz33XOzbty+++93vxpQpU2LNmjXxgx/8oN63M2HChCgqKoqFCxfGTTfdVOfrffWrX41+/frFOeecEwMGDIht27bFwoUL42Mf+1iMHj06IiKOP/74uPvuu+OTn/xk7Nq1K6677rro2LFjtWPOmDEj/vmf/zkuuOCCuOmmm2LAgAGxZcuWePDBB2P+/Pmxb9+++NGPfhSf/exno1+/fvHiiy/GSy+9FBdffHG91w2tyZgxYyLLsmq3t2/fPm688ca48cYbq92nX79+8cADDxzUPLzEAIeAxx57LPr27RuDBg2KiRMnxlNPPRXf+c534pFHHon8/PwYNmxY3HrrrXHzzTfHSSedFMuWLWvQHynKy8uLSy65JEpLS+v1QDtu3LhYv359fOlLX4ohQ4bEF77whSgsLIwnnngievToERERd955Z7z77rsxYsSImDlzZnz961+v8e8yHHHEEbF69eo46qij4vOf/3yccMIJMXv27CgpKYmioqI44ogj4ve//3184QtfiCFDhsTll18eV111VVxxxRX1XjdQf7mspkwB2pzZs2fH//zP/8Sjjz7a0lMBDmFeYoDDxM6dO2Pz5s2xfPlycQDUyksMcJi44IIL4txzz40rr7wy+ZvskyZNqvRHVyp+/MM//EMLzRhoSV5iAOJPf/pTvP/++1Vu6969e3Tv3r2ZZwS0NIEAACS8xAAAJAQCAJAQCABAQiAAAAmBAAAkBAIAkBAIAEBCIAAAif8F6PlYKrnk74kAAAAASUVORK5CYII=", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.boxplot(data=data, x=data.Daily_Sales)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Identifier les outliers avec la méthode IQR\n", "Q1 = data['Daily_Sales'].quantile(0.25)\n", "Q3 = data['Daily_Sales'].quantile(0.75)\n", "IQR = Q3 - Q1\n", "IQR\n", "\n", "k = 1.5 # Facteur de seuil pour définir les outliers\n", "lower_limit = Q1 - k * IQR\n", "upper_limit = Q3 + k * IQR\n", "\n", "\n", "outliers = data[(data['Daily_Sales'] < lower_limit) | (data['Daily_Sales'] > upper_limit)]\n", "len(outliers)\n", "\n", "# Diviser outliers en deux\n", "upper_limit2 = 2800000.00\n", "outliers_contigus = data[(data['Daily_Sales'] > upper_limit) & (data['Daily_Sales'] <= upper_limit2)]\n", "outliers_non_contigus = data[data['Daily_Sales'] > upper_limit2]\n", "\n", "\n", "data.loc[data['Daily_Sales'] > upper_limit2, 'Daily_Sales'] = upper_limit2\n", "\n", "sns.boxplot(data=data,x=data.Daily_Sales)\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAggAAAGxCAYAAAAH0U5DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAWp0lEQVR4nO3de5DVdf348ddy2yVhAVMQcpObF0SdvKQDpGZhhEZqNVg0CKWiI/2Rpmai4qh5C83GyFJLzNEYzctYoqYYY5Km0dIwQSiiqQGWjQqkKLDv7x/+2J/rC5Td2LO7+HjMnJlz+XzOvs7bHc7TzzlnT1UppQQAwLt0ausBAID2RyAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEDSpaU7NjQ0xIoVK6Jnz55RVVW1LWcCAFpJKSXWrFkTAwYMiE6dtnycoMWBsGLFiqirq2vp7gBAG3rxxRdj11133eLtLQ6Enj17Nv6A2tralt4NAFBBq1evjrq6usbn8S1pcSBselmhtrZWIABAB/NBbw/wJkUAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQdGnrAd6tlBLr1q2LiIiampqoqqpq44kA4MOpXR1BWLduXYwdOzbGjh3bGAoAQOW1u0DY3HkAoLLaVSAAAO2DQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABA0q4CoaGhYbPnAYDKaleBsHr16s2eBwAqq10FAgDQPggEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAABJl7YeAABo6tOf/nTj+Xnz5rXJDI4gAEA78u442NzlShEIAEAiEACgndjS0YK2OIqw1YHw1ltvxerVq5ucAIBt44MioNKRsNWBcNlll0WvXr0aT3V1da05FwDQhrY6EL73ve/F66+/3nh68cUXW3MuAKANbXUgVFdXR21tbZMTALBtfNDHGSv9cUdvUgSAdmJLEdAWfwtBIAAAiUAAgHbkvUcL2uovKfpTywDQzrRVFLybIwgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAAJJ2FQi1tbWbPQ8AVFa7CoROnTpt9jwAUFmehQGARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIGlXgVBTU7PZ8wBAZXVp6wHeraamJu6///7G8wBA22hXgVBVVRXdu3dv6zEA4EOvXb3EAAC0DwIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJF1aumMpJSIiVq9evc2GAQBa16bn7U3P41vS4kBYs2ZNRETU1dW19C4AgDayZs2a6NWr1xZvryoflBBb0NDQECtWrIiePXtGVVVViwd8r9WrV0ddXV28+OKLUVtbu83ul8xaV4Z1rgzrXBnWuTJac51LKbFmzZoYMGBAdOq05XcatPgIQqdOnWLXXXdt6e4fqLa21i9fhVjryrDOlWGdK8M6V0ZrrfP7HTnYxJsUAYBEIAAASbsLhOrq6pg+fXpUV1e39SjbPWtdGda5MqxzZVjnymgP69ziNykCANuvdncEAQBoewIBAEgEAgCQtEkgzJw5MwYOHBg1NTVxyCGHxJNPPvm+299xxx2x1157RU1NTey7774xZ86cCk3a8TVnrW+44YY49NBDo0+fPtGnT58YPXr0B/634R3N/Z3eZPbs2VFVVRXHHnts6w64nWjuOr/22msxderU6N+/f1RXV8cee+zh34+t0Nx1vuaaa2LPPfeM7t27R11dXZx++umxbt26Ck3bMT366KMxbty4GDBgQFRVVcU999zzgfvMmzcvDjjggKiuro6hQ4fGrFmzWnfIUmGzZ88u3bp1K7/4xS/K3/72t3LyySeX3r17l5dffnmz28+fP7907ty5XHnllWXx4sXlvPPOK127di2LFi2q8OQdT3PXesKECWXmzJmlvr6+LFmypEyePLn06tWrvPTSSxWevGNp7jpv8txzz5WPfexj5dBDDy3HHHNMZYbtwJq7zm+99VY56KCDylFHHVUee+yx8txzz5V58+aVhQsXVnjyjqW563zrrbeW6urqcuutt5bnnnuuPPjgg6V///7l9NNPr/DkHcucOXPKtGnTyl133VUiotx9993vu/3y5cvLRz7ykXLGGWeUxYsXl2uvvbZ07ty5PPDAA602Y8UD4eCDDy5Tp05tvLxx48YyYMCActlll212+/Hjx5ejjz66yXWHHHJIOeWUU1p1zu1Bc9f6vTZs2FB69uxZbr755tYacbvQknXesGFDGTlyZLnxxhvLpEmTBMJWaO46X3fddWXw4MHl7bffrtSI24XmrvPUqVPLZz7zmSbXnXHGGWXUqFGtOuf2ZGsC4eyzzy7Dhw9vct3xxx9fxowZ02pzVfQlhrfffjsWLFgQo0ePbryuU6dOMXr06Hj88cc3u8/jjz/eZPuIiDFjxmxxe97RkrV+rzfeeCPWr18fO+64Y2uN2eG1dJ0vuuii6Nu3b5x44omVGLPDa8k633vvvTFixIiYOnVq9OvXL/bZZ5+49NJLY+PGjZUau8NpyTqPHDkyFixY0PgyxPLly2POnDlx1FFHVWTmD4u2eC5s8XcxtMQrr7wSGzdujH79+jW5vl+/fvH3v/99s/usWrVqs9uvWrWq1ebcHrRkrd/ru9/9bgwYMCD9UvL/tWSdH3vssfj5z38eCxcurMCE24eWrPPy5cvjkUceia9//esxZ86cWLZsWZx22mmxfv36mD59eiXG7nBass4TJkyIV155JT71qU9FKSU2bNgQp556apx77rmVGPlDY0vPhatXr44333wzunfvvs1/pk8xsFmXX355zJ49O+6+++6oqalp63G2G2vWrImJEyfGDTfcEDvttFNbj7Nda2hoiL59+8b1118fBx54YBx//PExbdq0+OlPf9rWo21X5s2bF5deemn85Cc/ib/85S9x1113xX333RcXX3xxW4/G/6iiRxB22mmn6Ny5c7z88stNrn/55Zdjl1122ew+u+yyS7O25x0tWetNZsyYEZdffnk8/PDDsd9++7XmmB1ec9f52Wefjeeffz7GjRvXeF1DQ0NERHTp0iWWLl0aQ4YMad2hO6CW/D73798/unbtGp07d268btiwYbFq1ap4++23o1u3bq06c0fUknU+//zzY+LEiXHSSSdFRMS+++4b//3vf2PKlCkxbdq09/06Ybbelp4La2trW+XoQUSFjyB069YtDjzwwJg7d27jdQ0NDTF37twYMWLEZvcZMWJEk+0jIh566KEtbs87WrLWERFXXnllXHzxxfHAAw/EQQcdVIlRO7TmrvNee+0VixYtioULFzaevvjFL8YRRxwRCxcujLq6ukqO32G05Pd51KhRsWzZssYAi4h4+umno3///uJgC1qyzm+88UaKgE1RVvwl/22mTZ4LW+3tj1swe/bsUl1dXWbNmlUWL15cpkyZUnr37l1WrVpVSill4sSJ5Zxzzmncfv78+aVLly5lxowZZcmSJWX69Ok+5riVmrvWl19+eenWrVv59a9/XVauXNl4WrNmTVs9hA6huev8Xj7FsHWau84vvPBC6dmzZ/nWt75Vli5dWn7729+Wvn37lksuuaStHkKH0Nx1nj59eunZs2f51a9+VZYvX15+97vflSFDhpTx48e31UPoENasWVPq6+tLfX19iYhy9dVXl/r6+vKPf/yjlFLKOeecUyZOnNi4/aaPOZ511lllyZIlZebMmdvfxxxLKeXaa68tH//4x0u3bt3KwQcfXJ544onG2w4//PAyadKkJtvffvvtZY899ijdunUrw4cPL/fdd1+FJ+64mrPWu+22W4mIdJo+fXrlB+9gmvs7/W4CYes1d53/+Mc/lkMOOaRUV1eXwYMHl+9///tlw4YNFZ6642nOOq9fv75ceOGFZciQIaWmpqbU1dWV0047rbz66quVH7wD+f3vf7/Zf283re2kSZPK4Ycfnvb5xCc+Ubp161YGDx5cbrrpplad0bc5AgCJd48AAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCBABzRw4MC45pprGi9XVVXFPffcs8Xtn3/++aiqqmo3XzE9a9as6N27d1uPAbwPgQAVNHny5Dj22GPT9fPmzYuqqqp47bXXWnS/K1eujLFjx/5vw21Dmx7Pe0/nnXdeW48GbKWKft0z0Dra69efL126NGpraxsv9+jRow2nAZrDEQRoh+68884YPnx4VFdXx8CBA+Oqq6563+3f+xLDk08+Gfvvv3/U1NTEQQcdFPX19U2237hxY5x44okxaNCg6N69e+y5557xox/9qPH2Rx99NLp27RqrVq1qst+3v/3tOPTQQ7f6cfTt2zd22WWXxtOWAuHZZ5+NY445Jvr16xc9evSIT37yk/Hwww832WblypVx9NFHR/fu3WPQoEFx2223pZdagG1HIEA7s2DBghg/fnx89atfjUWLFsWFF14Y559/fsyaNWur9l+7dm184QtfiL333jsWLFgQF154YZx55plNtmloaIhdd9017rjjjli8eHFccMEFce6558btt98eERGHHXZYDB48OG655ZbGfdavXx+33nprfPOb39xmj/XdMx911FExd+7cqK+vj89//vMxbty4eOGFFxq3OeGEE2LFihUxb968uPPOO+P666+Pf/3rX9t8FuD/adXvigSamDRpUuncuXPZYYcdmpxqampKRJRXX321TJgwoRx55JFN9jvrrLPK3nvv3Xh5t912Kz/84Q8bL0dEufvuu0sppfzsZz8rH/3oR8ubb77ZePt1111XIqLU19dvcbapU6eWL3/5y42Xr7jiijJs2LDGy3feeWfp0aNHWbt27Qc+zk1fZfvex/nKK6+UUkq56aabSq9evd73PoYPH16uvfbaUkopS5YsKRFRnnrqqcbbn3nmmRIRTdYB2HYcQYAKO+KII2LhwoVNTjfeeGPj7UuWLIlRo0Y12WfUqFHxzDPPxMaNGz/w/pcsWRL77bdf1NTUNF43YsSItN3MmTPjwAMPjJ133jl69OgR119/fZP/Y588eXIsW7YsnnjiiYh455MH48ePjx122GGrH+sf/vCHJo+zT58+m91u7dq1ceaZZ8awYcOid+/e0aNHj1iyZEnjPEuXLo0uXbrEAQcc0LjP0KFDt3h/wP/OmxShwnbYYYcYOnRok+teeumlis4we/bsOPPMM+Oqq66KESNGRM+ePeMHP/hB/OlPf2rcpm/fvjFu3Li46aabYtCgQXH//ffHvHnzmvVzBg0atFUfZzzzzDPjoYceihkzZsTQoUOje/fu8ZWvfCXefvvtZj4yYFsRCNDODBs2LObPn9/kuvnz58cee+wRnTt33qr9b7nllli3bl3jUYRNRwHefX8jR46M0047rfG6Z599Nt3XSSedFF/72tdi1113jSFDhqQjG9vK/PnzY/LkyXHcccdFxDtHFJ5//vnG2/fcc8/YsGFD1NfXx4EHHhgREcuWLYtXX321VeYBvEkR2p3vfOc7MXfu3Lj44ovj6aefjptvvjl+/OMfpzcabsmECROiqqoqTj755Fi8eHHMmTMnZsyY0WSb3XffPf785z/Hgw8+GE8//XScf/758dRTT6X7GjNmTNTW1sYll1wS3/jGN7bJ49uc3XffPe66665YuHBh/PWvf40JEyZEQ0ND4+177bVXjB49OqZMmRJPPvlk1NfXx5QpU6J79+5RVVXVanPBh5lAgHbmgAMOiNtvvz1mz54d++yzT1xwwQVx0UUXxeTJk7dq/x49esRvfvObWLRoUey///4xbdq0uOKKK5psc8opp8SXvvSlOP744+OQQw6J//znP02OJmzSqVOnmDx5cmzcuDFOOOGEbfHwNuvqq6+OPn36xMiRI2PcuHExZsyYJu83iIj45S9/Gf369YvDDjssjjvuuDj55JOjZ8+eTd5rAWw7VaWU0tZDAO3XiSeeGP/+97/j3nvvbetRmnjppZeirq4uHn744fjsZz/b1uPAdsd7EIDNev3112PRokVx2223tYs4eOSRR2Lt2rWx7777xsqVK+Pss8+OgQMHxmGHHdbWo8F2yUsMwGYdc8wx8bnPfS5OPfXUOPLII5vcNnbs2OjRo8dmT5deemmrzLN+/fo499xzY/jw4XHcccfFzjvvHPPmzYuuXbu2ys+DDzsvMQDN9s9//jPefPPNzd624447xo477ljhiYBtTSAAAImXGACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEDyf25WjHcAybg9AAAAAElFTkSuQmCC", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.boxplot(data=data, x=data.Holiday_Flag)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.boxplot(data=data, x=data.Temperature)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Nombre d'observations considérées comme des outliers à moins 25% non-contigus : 1\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Identifier les outliers avec la méthode IQR\n", "Q1 = data['Temperature'].quantile(0.25)\n", "Q3 = data['Temperature'].quantile(0.75)\n", "IQR = Q3 - Q1\n", "\n", "k = 1.5 # Facteur de seuil pour définir les outliers\n", "lower_limit = Q1 - k * IQR\n", "upper_limit = Q3 + k * IQR\n", "\n", "outliers = data[(data['Temperature'] < lower_limit)]\n", "\n", "# Diviser les outliers en deux catégories\n", "lower_limit2 = 0\n", "\n", "outliers_below_Q1 = data[data['Temperature'] < lower_limit2]\n", "print(\"Nombre d'observations considérées comme des outliers à moins 25% non-contigus : \", len(outliers_below_Q1))\n", "\n", "data.loc[data['Temperature'] < lower_limit, 'Temperature'] = lower_limit\n", "\n", "sns.boxplot(data=data,x=data.Temperature)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.boxplot(data=data,x=data.Fuel_Price)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAggAAAGwCAYAAADMjZ3mAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAUO0lEQVR4nO3dfWzdddn48euUbm23tZsIW1P3gARkbiDInEvRSDRkD1mmDv/gJkhESQzSQYBlMWhUgjFABHwgE2JiwITkvuOiIwPddHFjg2xDhXHDcPKgwpBtLLqMdkj3QD+/P/ztuHJtdO6mPR3n9UqadOd8z+l1rox933xPm1ZKKSUAAA7TUOsBAIDhRyAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIGk83gf29fXF9u3bo7W1NSqVyjs5EwAwSEop0dPTEx0dHdHQcPTrBMcdCNu3b49JkyYd78MBgBp6+eWXY+LEiUe9/7gDobW1tfoF2trajvdpAIAh1N3dHZMmTaqex4/muAPh0NsKbW1tAgEATjADfXuAb1IEABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAASWOtB6iFUkr09vbWegyoa6WU2LdvX0RENDU1RaVSqfFEDEfNzc3+btRIXQZCb29vzJs3r9ZjADCAlStXRktLS63HqEveYgAAkrq8gnC4veddGqWh7tcAQ+/NA9H6v/8TERE95/5XxEkjajwQw0Wl72CMefK/az1G3av7M2NpaPQPE9TaSSP8d0hVqfUARIS3GACAIxAIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAACSxloPcLhSSvT29kZERHNzc1QqlRpPBABDa7icC4fVFYTe3t6YN29ezJs3r7ocAKgnw+VcOKwCAQAYHgQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkjbUe4HCllOrnvb29g/Z1+j33YV8TgGFgiM4Fw9Xhr7nU8Bx1zIGwb9++2LdvX/XP3d3d7/gwhz//woUL3/HnP6K+gxExcmi+FgAD6ztY/XTIzgXD1L59+2LUqFE1+drH/BbDLbfcEmPHjq1+TJo0aTDnAgBq6JivINx4441xww03VP/c3d39jkdCU1NT9fPly5dHc3PzO/r8h/T29v67ShuG1bssABz27/JgnguGq8PPUYefF4faMZ8dm5qaBn3QSqVS/by5uTlaWloG9ev9/y86+F8DgGNXi3PBMFWp4TnKTzEAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACApLHWAxyuubk5Vq5cWf0cAOrNcDkXDqtAqFQq0dLSUusxAKBmhsu50FsMAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJAIBAEgEAgCQCAQAIBEIAEAiEACARCAAAIlAAAASgQAAJI21HqDWKn0Ho9R6CKhHbx448ufUvUrfwVqPQAiEGPPkf9d6BKh7rf/7P7UeAXgLbzEAAEldXkFobm6OlStX1noMqGullNi3b19ERDQ1NUWlUqnxRAxHzc3NtR6hbtVlIFQqlWhpaan1GFD3Ro0aVesRgKPwFgMAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkAgEACARCABAIhAAgEQgAACJQAAAEoEAACQCAQBIBAIAkDQe7wNLKRER0d3d/Y4NAwAMrkPn7UPn8aM57kDo6emJiIhJkyYd71MAADXS09MTY8eOPer9lTJQQhxFX19fbN++PVpbW6NSqRz3gPxbd3d3TJo0KV5++eVoa2ur9TjvSnY8+Ox48Nnx4Hs377iUEj09PdHR0RENDUf/ToPjvoLQ0NAQEydOPN6H8zba2tredX8hhxs7Hnx2PPjsePC9W3f8dlcODvFNigBAIhAAgEQgDCNNTU3xrW99K5qammo9yruWHQ8+Ox58djz47Pj/8E2KAMC7lysIAEAiEACARCAAAIlAAAASgTDI1q9fHwsWLIiOjo6oVCrxwAMPHPXYq666KiqVSnz/+9/vd/vu3bvjsssui7a2thg3blxceeWVsXfv3sEd/ARyLDveunVrfPrTn46xY8fG6NGjY+bMmbFt27bq/b29vdHV1RXvfe97Y8yYMfG5z30uXn311SF8FcPbQDveu3dvLFq0KCZOnBgtLS0xbdq0uOeee/odY8dv75ZbbomZM2dGa2trjB8/Pj772c/Gs88+2++YY9nhtm3bYv78+TFq1KgYP358LFmyJA4ePDiUL2XYGmjHu3fvjmuuuSbOOuusaGlpicmTJ8e1114br732Wr/nqZcdC4RB9vrrr8e5554bS5cufdvjli9fHps2bYqOjo5032WXXRbPPPNMrF69Oh566KFYv359fPnLXx6skU84A+34z3/+c3z84x+PqVOnxsMPPxxPPfVUfOMb34jm5ubqMddff308+OCDsWzZsli3bl1s3749Lr744qF6CcPeQDu+4YYbYtWqVXH//ffH1q1b47rrrotFixbFihUrqsfY8dtbt25ddHV1xaZNm2L16tVx4MCBmD17drz++uvVYwba4Ztvvhnz58+P/fv3x4YNG+KnP/1p3HffffHNb36zFi9p2Blox9u3b4/t27fH7bffHlu2bIn77rsvVq1aFVdeeWX1Oepqx4UhExFl+fLl6fa//e1v5X3ve1/ZsmVLmTJlSvne975Xve+Pf/xjiYjy+9//vnrbypUrS6VSKa+88soQTH1iOdKOL7nkkvL5z3/+qI/Zs2dPGTFiRFm2bFn1tq1bt5aIKBs3bhysUU9YR9rx9OnTy80339zvtvPPP798/etfL6XY8fHYtWtXiYiybt26Usqx7fBXv/pVaWhoKDt37qwec/fdd5e2trayb9++oX0BJ4C37vhIfvazn5WRI0eWAwcOlFLqa8euINRYX19fXH755bFkyZKYPn16un/jxo0xbty4+MhHPlK97aKLLoqGhoZ47LHHhnLUE1JfX1/88pe/jA984AMxZ86cGD9+fMyaNavfJfLHH388Dhw4EBdddFH1tqlTp8bkyZNj48aNNZj6xHPBBRfEihUr4pVXXolSSqxduzaee+65mD17dkTY8fE4dFn75JNPjohj2+HGjRvjnHPOiQkTJlSPmTNnTnR3d8czzzwzhNOfGN6646Md09bWFo2N//rVRfW0Y4FQY7fddls0NjbGtddee8T7d+7cGePHj+93W2NjY5x88smxc+fOoRjxhLZr167Yu3dv3HrrrTF37tz4zW9+EwsXLoyLL7441q1bFxH/2vHIkSNj3Lhx/R47YcIEOz5Gd911V0ybNi0mTpwYI0eOjLlz58bSpUvjE5/4RETY8X+qr68vrrvuuvjYxz4WZ599dkQc2w537tzZ78R16P5D9/FvR9rxW/3973+Pb3/72/3e0q2nHR/3b3Pk/+7xxx+PH/zgB/HEE0/4ldmDpK+vLyIiPvOZz8T1118fERHnnXdebNiwIe6555648MILazneu8Zdd90VmzZtihUrVsSUKVNi/fr10dXVFR0dHf3+j5dj09XVFVu2bIlHH3201qO8aw204+7u7pg/f35MmzYtbrrppqEdbphwBaGGHnnkkdi1a1dMnjw5Ghsbo7GxMV566aVYvHhxnHbaaRER0d7eHrt27er3uIMHD8bu3bujvb29BlOfWE455ZRobGyMadOm9bv9gx/8YPWnGNrb22P//v2xZ8+efse8+uqrdnwM3njjjfja174Wd955ZyxYsCA+9KEPxaJFi+KSSy6J22+/PSLs+D+xaNGieOihh2Lt2rUxceLE6u3HssP29vb0Uw2H/mzP/3a0HR/S09MTc+fOjdbW1li+fHmMGDGiel897Vgg1NDll18eTz31VDz55JPVj46OjliyZEn8+te/joiIzs7O2LNnTzz++OPVx61Zsyb6+vpi1qxZtRr9hDFy5MiYOXNm+nGx5557LqZMmRIRETNmzIgRI0bEb3/72+r9zz77bGzbti06OzuHdN4T0YEDB+LAgQPR0ND/n5OTTjqpegXHjgdWSolFixbF8uXLY82aNfH+97+/3/3HssPOzs54+umn+/1PxerVq6OtrS1Fcj0aaMcR/7pyMHv27Bg5cmSsWLGi3087RdTZjmv8TZLvej09PWXz5s1l8+bNJSLKnXfeWTZv3lxeeumlIx7/1p9iKKWUuXPnlg9/+MPlscceK48++mg588wzy6WXXjoE058YBtrxL37xizJixIjy4x//uDz//PPlrrvuKieddFJ55JFHqs9x1VVXlcmTJ5c1a9aUP/zhD6Wzs7N0dnbW6iUNOwPt+MILLyzTp08va9euLX/5y1/KvffeW5qbm8uPfvSj6nPY8dv7yle+UsaOHVsefvjhsmPHjurHP//5z+oxA+3w4MGD5eyzzy6zZ88uTz75ZFm1alU59dRTy4033liLlzTsDLTj1157rcyaNaucc8455YUXXuh3zMGDB0sp9bVjgTDI1q5dWyIifXzhC1844vFHCoR//OMf5dJLLy1jxowpbW1t5Ytf/GLp6ekZ/OFPEMey45/85CfljDPOKM3NzeXcc88tDzzwQL/neOONN8rVV19d3vOe95RRo0aVhQsXlh07dgzxKxm+Btrxjh07yhVXXFE6OjpKc3NzOeuss8odd9xR+vr6qs9hx2/vSPuNiHLvvfdWjzmWHb744otl3rx5paWlpZxyyill8eLF1R/Rq3cD7fhof88jovz1r3+tPk+97NivewYAEt+DAAAkAgEASAQCAJAIBAAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgHq1M6dO+Oaa66J008/PZqammLSpEmxYMGC6i8DOu2006JSqUSlUonRo0fH+eefH8uWLas+/qabborzzjuvRtMDg00gQB168cUXY8aMGbFmzZr47ne/G08//XSsWrUqPvnJT0ZXV1f1uJtvvjl27NgRmzdvjpkzZ8Yll1wSGzZsqOHkwFBprPUAwNC7+uqro1KpxO9+97sYPXp09fbp06fHl770peqfW1tbo729Pdrb22Pp0qVx//33x4MPPhgXXHBBLcYGhpArCFBndu/eHatWrYqurq5+cXDIuHHjjvi4xsbGGDFiROzfv3+QJwSGA1cQoM688MILUUqJqVOnHvNj9u/fH3fccUe89tpr8alPfWoQpwOGC1cQoM78J7/h/atf/WqMGTMmRo0aFbfddlvceuutMX/+/EGcDhguXEGAOnPmmWdGpVKJP/3pTwMeu2TJkrjiiitizJgxMWHChKhUKkMwITAcuIIAdebkk0+OOXPmxNKlS+P1119P9+/Zs6f6+SmnnBJnnHFGtLe3iwOoMwIB6tDSpUvjzTffjI9+9KPx85//PJ5//vnYunVr/PCHP4zOzs5ajwcMA95igDp0+umnxxNPPBHf+c53YvHixbFjx4449dRTY8aMGXH33XfXejxgGKiU/+Q7lgCAuuAtBgAgEQgAQCIQAIBEIAAAiUAAABKBAAAkAgEASAQCAJAIBAAgEQgAQCIQAIDk/wF2odeJRdP4tAAAAABJRU5ErkJggg==", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.boxplot(data=data,x=data.CPI)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.boxplot(data=data,x=data.Unemployment)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Nombre d'observations considérées comme des outliers à plus de 75% non-contigus : 156 \n", "\n", "Nombre d'observations considérées comme des outliers à moins 25% non-contigus : 4\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Identifier les outliers avec la méthode IQR\n", "Q1 = data['Unemployment'].quantile(0.25)\n", "Q3 = data['Unemployment'].quantile(0.75)\n", "IQR = Q3 - Q1\n", "\n", "k = 1.5 # Facteur de seuil pour définir les outliers\n", "lower_limit = Q1 - k * IQR\n", "upper_limit = Q3 + k * IQR\n", "\n", "outliers = data[(data['Unemployment'] < lower_limit) | (data['Unemployment'] > upper_limit)]\n", "\n", "# Diviser les outliers en deux catégories\n", "upper_limit2 = 14\n", "lower_limit2 = 4\n", "\n", "outliers_contigus = data[(data['Unemployment'] > upper_limit) & (data['Unemployment'] <= upper_limit2)]\n", "outliers_non_contigus = data[data['Unemployment'] > upper_limit2]\n", "outliers_below_Q1 = data[data['Unemployment'] < lower_limit2]\n", "print(\"Nombre d'observations considérées comme des outliers à plus de 75% non-contigus : \", len(outliers_non_contigus), \"\\n\")\n", "print(\"Nombre d'observations considérées comme des outliers à moins 25% non-contigus : \", len(outliers_below_Q1))\n", "\n", "data.loc[data['Unemployment'] > upper_limit, 'Unemployment'] = upper_limit\n", "data.loc[data['Unemployment'] < lower_limit, 'Unemployment'] = lower_limit\n", "\n", "sns.boxplot(data=data,x=data.Unemployment)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StoreDateDaily_SalesHoliday_FlagTemperatureFuel_PriceCPIUnemploymentYearMonthDay
012010-05-021643690.90042.312.572211.0963588.106201052
112010-12-021641957.44138.512.548211.2421708.1062010122
212010-02-191611968.17039.932.514211.2891438.1062010219
312010-02-261409727.59046.632.561211.3196438.1062010226
412010-05-031554806.68046.502.625211.3501438.106201053
....................................
6430452012-09-28713173.95064.883.997192.0135588.6842012928
6431452012-05-10733455.07064.893.985192.1704128.6672012510
6432452012-12-10734464.36054.474.000192.3272658.66720121210
6433452012-10-19718125.53056.473.969192.3308548.66720121019
6434452012-10-26760281.43058.853.882192.3088998.66720121026
\n", "

6435 rows × 11 columns

\n", "
" ], "text/plain": [ " Store Date Daily_Sales Holiday_Flag Temperature Fuel_Price \\\n", "0 1 2010-05-02 1643690.90 0 42.31 2.572 \n", "1 1 2010-12-02 1641957.44 1 38.51 2.548 \n", "2 1 2010-02-19 1611968.17 0 39.93 2.514 \n", "3 1 2010-02-26 1409727.59 0 46.63 2.561 \n", "4 1 2010-05-03 1554806.68 0 46.50 2.625 \n", "... ... ... ... ... ... ... \n", "6430 45 2012-09-28 713173.95 0 64.88 3.997 \n", "6431 45 2012-05-10 733455.07 0 64.89 3.985 \n", "6432 45 2012-12-10 734464.36 0 54.47 4.000 \n", "6433 45 2012-10-19 718125.53 0 56.47 3.969 \n", "6434 45 2012-10-26 760281.43 0 58.85 3.882 \n", "\n", " CPI Unemployment Year Month Day \n", "0 211.096358 8.106 2010 5 2 \n", "1 211.242170 8.106 2010 12 2 \n", "2 211.289143 8.106 2010 2 19 \n", "3 211.319643 8.106 2010 2 26 \n", "4 211.350143 8.106 2010 5 3 \n", "... ... ... ... ... ... \n", "6430 192.013558 8.684 2012 9 28 \n", "6431 192.170412 8.667 2012 5 10 \n", "6432 192.327265 8.667 2012 12 10 \n", "6433 192.330854 8.667 2012 10 19 \n", "6434 192.308899 8.667 2012 10 26 \n", "\n", "[6435 rows x 11 columns]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Extract relevant features from the 'Date' column\n", "data['Year'] = pd.to_datetime(data['Date']).dt.year\n", "data['Month'] = pd.to_datetime(data['Date']).dt.month\n", "data['Day'] = pd.to_datetime(data['Date']).dt.day\n", "\n", "#data.drop('Date', axis=1, inplace=True)\n", "#encoded_data = pd.DataFrame([data['Year'],data['Month'], data['DayOfWeek']], columns=(['Year', 'Month', 'DayOfWeek']))\n", "\n", "# Concatenate the encoded features with the original DataFrame\n", "#data_encoded = pd.concat([data.drop(['Year', 'Month', 'DayOfWeek'], axis=1), encoded_data], axis=1)\n", "#data_encoded\n", "data" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "data = data.drop(['Date'],axis=1)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StoreDaily_SalesHoliday_FlagTemperatureFuel_PriceCPIUnemploymentYearMonthDay
011643690.90042.312.572211.0963588.106201052
111641957.44138.512.548211.2421708.1062010122
211611968.17039.932.514211.2891438.1062010219
311409727.59046.632.561211.3196438.1062010226
411554806.68046.502.625211.3501438.106201053
.................................
643045713173.95064.883.997192.0135588.6842012928
643145733455.07064.893.985192.1704128.6672012510
643245734464.36054.474.000192.3272658.66720121210
643345718125.53056.473.969192.3308548.66720121019
643445760281.43058.853.882192.3088998.66720121026
\n", "

6435 rows × 10 columns

\n", "
" ], "text/plain": [ " Store Daily_Sales Holiday_Flag Temperature Fuel_Price CPI \\\n", "0 1 1643690.90 0 42.31 2.572 211.096358 \n", "1 1 1641957.44 1 38.51 2.548 211.242170 \n", "2 1 1611968.17 0 39.93 2.514 211.289143 \n", "3 1 1409727.59 0 46.63 2.561 211.319643 \n", "4 1 1554806.68 0 46.50 2.625 211.350143 \n", "... ... ... ... ... ... ... \n", "6430 45 713173.95 0 64.88 3.997 192.013558 \n", "6431 45 733455.07 0 64.89 3.985 192.170412 \n", "6432 45 734464.36 0 54.47 4.000 192.327265 \n", "6433 45 718125.53 0 56.47 3.969 192.330854 \n", "6434 45 760281.43 0 58.85 3.882 192.308899 \n", "\n", " Unemployment Year Month Day \n", "0 8.106 2010 5 2 \n", "1 8.106 2010 12 2 \n", "2 8.106 2010 2 19 \n", "3 8.106 2010 2 26 \n", "4 8.106 2010 5 3 \n", "... ... ... ... ... \n", "6430 8.684 2012 9 28 \n", "6431 8.667 2012 5 10 \n", "6432 8.667 2012 12 10 \n", "6433 8.667 2012 10 19 \n", "6434 8.667 2012 10 26 \n", "\n", "[6435 rows x 10 columns]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Store int64\n", "Daily_Sales float64\n", "Holiday_Flag int64\n", "Temperature float64\n", "Fuel_Price float64\n", "CPI float64\n", "Unemployment float64\n", "Year int64\n", "Month int64\n", "Day int64\n", "dtype: object" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.dtypes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Pycaret Linear Regression Model Elaboration" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "from pycaret.regression import *" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 DescriptionValue
0Session id8701
1TargetDaily_Sales
2Target typeRegression
3Original data shape(6435, 10)
4Transformed data shape(6435, 10)
5Transformed train set shape(4504, 10)
6Transformed test set shape(1931, 10)
7Numeric features9
8PreprocessTrue
9Imputation typesimple
10Numeric imputationmean
11Categorical imputationmode
12NormalizeTrue
13Normalize methodminmax
14Fold GeneratorKFold
15Fold Number10
16CPU Jobs-1
17Use GPUFalse
18Log ExperimentFalse
19Experiment Namereg-default-name
20USIb442
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "reg = setup(data, target='Daily_Sales', normalize=True, normalize_method='minmax')" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,\n",
       "             colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,\n",
       "             early_stopping_rounds=None, enable_categorical=False,\n",
       "             eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',\n",
       "             importance_type=None, interaction_constraints='',\n",
       "             learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,\n",
       "             max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,\n",
       "             missing=nan, monotone_constraints='()', n_estimators=100,\n",
       "             n_jobs=-1, num_parallel_tree=1, predictor='auto',\n",
       "             random_state=8701, reg_alpha=0, reg_lambda=1, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,\n", " colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,\n", " early_stopping_rounds=None, enable_categorical=False,\n", " eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',\n", " importance_type=None, interaction_constraints='',\n", " learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,\n", " max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,\n", " missing=nan, monotone_constraints='()', n_estimators=100,\n", " n_jobs=-1, num_parallel_tree=1, predictor='auto',\n", " random_state=8701, reg_alpha=0, reg_lambda=1, ...)" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "compare_models()" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Initiated. . . . . . . . . . . . . . . . . .18:08:49
Status. . . . . . . . . . . . . . . . . .Selecting Estimator
Estimator. . . . . . . . . . . . . . . . . .Compiling Library
\n", "
" ], "text/plain": [ " \n", " \n", "Initiated . . . . . . . . . . . . . . . . . . 18:08:49\n", "Status . . . . . . . . . . . . . . . . . . Selecting Estimator\n", "Estimator . . . . . . . . . . . . . . . . . . Compiling Library" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 MAEMSERMSER2RMSLEMAPE
Fold      
052721.73837094510592.000084228.91410.97570.10590.0613
150161.01957583405056.000087082.75000.97530.08600.0557
262588.339811270412288.0000106162.19530.96440.09560.0628
354049.58598522477568.000092317.26560.97360.08350.0559
450440.29306355119616.000079719.00780.97950.09850.0568
555670.25789608662016.000098023.78120.96910.10010.0625
651194.22666248470016.000079047.26560.98090.08780.0569
752440.38286610165248.000081302.92190.97870.08490.0570
853432.12506348822016.000079679.49220.97790.18390.0577
947908.01955803044352.000076177.71880.98210.07880.0527
Mean53060.59887544508876.800086374.13120.97570.10050.0579
Std3801.44411665330173.29979166.14740.00520.02900.0031
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "model=create_model(\"xgboost\")" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "448436a0b16c4954a749d2fd6ab817eb", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "evaluate_model(model)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 MAEMSERMSER2RMSLEMAPE
Fold      
072144.234413923403776.0000117997.47660.95240.12320.0855
167364.046913027191808.0000114136.72660.95750.10790.0743
278961.507816434673664.0000128197.78910.94820.11320.0808
372114.859416118704128.0000126959.46090.95010.11290.0774
463425.82039442840576.000097174.28120.96950.09700.0695
566351.460913681622016.0000116968.46880.95600.11070.0740
658300.68368188753920.000090491.73440.97500.09040.0624
765851.570310858310656.0000104203.21880.96510.10240.0723
863356.656210729723904.0000103584.38280.96260.09310.0660
960723.425810246772736.0000101226.34380.96830.09080.0660
Mean66859.426612265199718.4000110093.98830.96050.10420.0728
Std5818.06862656402186.304012021.38740.00860.01060.0068
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).\n" ] }, { "data": { "text/html": [ "
XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,\n",
       "             colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,\n",
       "             early_stopping_rounds=None, enable_categorical=False,\n",
       "             eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',\n",
       "             importance_type=None, interaction_constraints='',\n",
       "             learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,\n",
       "             max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,\n",
       "             missing=nan, monotone_constraints='()', n_estimators=100,\n",
       "             n_jobs=-1, num_parallel_tree=1, predictor='auto',\n",
       "             random_state=8701, reg_alpha=0, reg_lambda=1, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,\n", " colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,\n", " early_stopping_rounds=None, enable_categorical=False,\n", " eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',\n", " importance_type=None, interaction_constraints='',\n", " learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,\n", " max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,\n", " missing=nan, monotone_constraints='()', n_estimators=100,\n", " n_jobs=-1, num_parallel_tree=1, predictor='auto',\n", " random_state=8701, reg_alpha=0, reg_lambda=1, ...)" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tune_model(model, search_library='optuna')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Daily Sales based on period between two dates" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StoreDaily_SalesHoliday_FlagTemperatureFuel_PriceCPIUnemploymentYearMonthDay
3411453329.50071.892.603211.6719897.83802010110
17721827440.43069.242.603211.3298748.16302010110
3203358784.10073.602.603214.9846557.56402010110
46341842821.02063.962.619126.2346007.12702010110
6065283178.12071.102.603212.2269466.76802010110
74961328468.89070.692.603213.1907137.00702010110
8927448998.73049.992.759190.6738249.13702010110
10358804105.49068.702.603215.0310036.43302010110
11789495692.19069.082.603215.2141346.56002010110
1321101645892.97086.013.001126.2346009.00302010110
1464111182490.46075.112.603214.9846557.56402010110
160712850936.26085.203.001126.23460011.21852010110
1750131765584.48068.742.853126.2346007.79502010110
1893141855703.66070.582.707182.7168388.72402010110
203615566945.95059.692.840132.7568008.06702010110
217916463977.54059.392.759190.6738246.98602010110
232217829207.27060.072.853126.2346006.88502010110
246518948977.50067.762.717132.7568009.33102010110
2608191379456.30059.912.840132.7568008.06702010110
2751201933719.21061.082.707204.8850977.48402010110
289421677158.39070.282.603211.3298748.16302010110
303722905987.17069.312.717136.6297578.57202010110
3180231129909.44062.072.717132.7568005.28702010110
3323241215273.20066.882.840132.7568008.27502010110
346625658640.14057.562.707204.8850977.48402010110
360926923221.52057.802.717132.7568008.14902010110
3752271543532.83070.192.840136.6297578.02102010110
3895281203080.41085.203.001126.23460011.21852010110
403829474698.01069.312.717132.75680010.52402010110
418130445475.30070.282.603211.3298748.16302010110
4324311213981.64070.282.603211.3298748.16302010110
4467321061089.56066.142.759190.6738249.13702010110
461033224294.39091.453.001126.2346009.26502010110
475334865709.11070.132.619126.23460010.21002010110
489635771065.21070.192.707136.6297578.76302010110
503936422169.47074.662.567210.4404438.47602010110
518237529877.93074.662.603210.4404438.47602010110
532538360256.58085.203.001126.23460011.21852010110
5468391219583.91072.742.603210.4404438.47602010110
561140891152.33062.012.717132.7568005.28702010110
5754411109216.35062.672.759190.6738247.50802010110
589742481523.93086.013.001126.2346009.00302010110
604043657108.77077.932.603203.59342910.21002010110
618344300152.45068.742.853126.2346007.61002010110
632645690007.76070.582.707182.7168388.72402010110
\n", "
" ], "text/plain": [ " Store Daily_Sales Holiday_Flag Temperature Fuel_Price CPI \\\n", "34 1 1453329.50 0 71.89 2.603 211.671989 \n", "177 2 1827440.43 0 69.24 2.603 211.329874 \n", "320 3 358784.10 0 73.60 2.603 214.984655 \n", "463 4 1842821.02 0 63.96 2.619 126.234600 \n", "606 5 283178.12 0 71.10 2.603 212.226946 \n", "749 6 1328468.89 0 70.69 2.603 213.190713 \n", "892 7 448998.73 0 49.99 2.759 190.673824 \n", "1035 8 804105.49 0 68.70 2.603 215.031003 \n", "1178 9 495692.19 0 69.08 2.603 215.214134 \n", "1321 10 1645892.97 0 86.01 3.001 126.234600 \n", "1464 11 1182490.46 0 75.11 2.603 214.984655 \n", "1607 12 850936.26 0 85.20 3.001 126.234600 \n", "1750 13 1765584.48 0 68.74 2.853 126.234600 \n", "1893 14 1855703.66 0 70.58 2.707 182.716838 \n", "2036 15 566945.95 0 59.69 2.840 132.756800 \n", "2179 16 463977.54 0 59.39 2.759 190.673824 \n", "2322 17 829207.27 0 60.07 2.853 126.234600 \n", "2465 18 948977.50 0 67.76 2.717 132.756800 \n", "2608 19 1379456.30 0 59.91 2.840 132.756800 \n", "2751 20 1933719.21 0 61.08 2.707 204.885097 \n", "2894 21 677158.39 0 70.28 2.603 211.329874 \n", "3037 22 905987.17 0 69.31 2.717 136.629757 \n", "3180 23 1129909.44 0 62.07 2.717 132.756800 \n", "3323 24 1215273.20 0 66.88 2.840 132.756800 \n", "3466 25 658640.14 0 57.56 2.707 204.885097 \n", "3609 26 923221.52 0 57.80 2.717 132.756800 \n", "3752 27 1543532.83 0 70.19 2.840 136.629757 \n", "3895 28 1203080.41 0 85.20 3.001 126.234600 \n", "4038 29 474698.01 0 69.31 2.717 132.756800 \n", "4181 30 445475.30 0 70.28 2.603 211.329874 \n", "4324 31 1213981.64 0 70.28 2.603 211.329874 \n", "4467 32 1061089.56 0 66.14 2.759 190.673824 \n", "4610 33 224294.39 0 91.45 3.001 126.234600 \n", "4753 34 865709.11 0 70.13 2.619 126.234600 \n", "4896 35 771065.21 0 70.19 2.707 136.629757 \n", "5039 36 422169.47 0 74.66 2.567 210.440443 \n", "5182 37 529877.93 0 74.66 2.603 210.440443 \n", "5325 38 360256.58 0 85.20 3.001 126.234600 \n", "5468 39 1219583.91 0 72.74 2.603 210.440443 \n", "5611 40 891152.33 0 62.01 2.717 132.756800 \n", "5754 41 1109216.35 0 62.67 2.759 190.673824 \n", "5897 42 481523.93 0 86.01 3.001 126.234600 \n", "6040 43 657108.77 0 77.93 2.603 203.593429 \n", "6183 44 300152.45 0 68.74 2.853 126.234600 \n", "6326 45 690007.76 0 70.58 2.707 182.716838 \n", "\n", " Unemployment Year Month Day \n", "34 7.8380 2010 1 10 \n", "177 8.1630 2010 1 10 \n", "320 7.5640 2010 1 10 \n", "463 7.1270 2010 1 10 \n", "606 6.7680 2010 1 10 \n", "749 7.0070 2010 1 10 \n", "892 9.1370 2010 1 10 \n", "1035 6.4330 2010 1 10 \n", "1178 6.5600 2010 1 10 \n", "1321 9.0030 2010 1 10 \n", "1464 7.5640 2010 1 10 \n", "1607 11.2185 2010 1 10 \n", "1750 7.7950 2010 1 10 \n", "1893 8.7240 2010 1 10 \n", "2036 8.0670 2010 1 10 \n", "2179 6.9860 2010 1 10 \n", "2322 6.8850 2010 1 10 \n", "2465 9.3310 2010 1 10 \n", "2608 8.0670 2010 1 10 \n", "2751 7.4840 2010 1 10 \n", "2894 8.1630 2010 1 10 \n", "3037 8.5720 2010 1 10 \n", "3180 5.2870 2010 1 10 \n", "3323 8.2750 2010 1 10 \n", "3466 7.4840 2010 1 10 \n", "3609 8.1490 2010 1 10 \n", "3752 8.0210 2010 1 10 \n", "3895 11.2185 2010 1 10 \n", "4038 10.5240 2010 1 10 \n", "4181 8.1630 2010 1 10 \n", "4324 8.1630 2010 1 10 \n", "4467 9.1370 2010 1 10 \n", "4610 9.2650 2010 1 10 \n", "4753 10.2100 2010 1 10 \n", "4896 8.7630 2010 1 10 \n", "5039 8.4760 2010 1 10 \n", "5182 8.4760 2010 1 10 \n", "5325 11.2185 2010 1 10 \n", "5468 8.4760 2010 1 10 \n", "5611 5.2870 2010 1 10 \n", "5754 7.5080 2010 1 10 \n", "5897 9.0030 2010 1 10 \n", "6040 10.2100 2010 1 10 \n", "6183 7.6100 2010 1 10 \n", "6326 8.7240 2010 1 10 " ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "start_date = pd.to_datetime('2010-01-01')\n", "end_date = pd.to_datetime('2010-01-10')\n", "\n", "filtered_df = data[(data['Year'] >= start_date.year) & (data['Month'] >= start_date.month) & (data['Day'] >= start_date.day) &\n", " (data['Year'] <= end_date.year) & (data['Month'] <= end_date.month) & (data['Day'] <= end_date.day)]\n", "filtered_df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let'us Try to predict Daily_Sales" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ " The predicted Daily Sales are :$ 688340.0\n" ] } ], "source": [ "#data['Date'] = pd.to_datetime(data['Date'])\n", "# Make predictions on new data\n", "new_data = pd.DataFrame([[45, 0, 70.58, 2.707, 182.716838, 8.724, 2010, 1, 10]],\n", " columns=['Store', 'Holiday_Flag', 'Temperature',\n", " 'Fuel_Price', 'CPI', 'Unemployment', 'Year', 'Month', 'Day'])\n", "new = [[45, 1, 29.67, 3.179, 182.571448, 8.724, 2010, 12, 31]]\n", "#prediction = model.predict(np.array(pd.DataFrame(new)))\n", "#print('Predicted number of sales: ', prediction)\n", "predictions = predict_model(model, data=new_data)\n", "\n", "# View the predicted values\n", "y_temp_predict = predictions['prediction_label'].values[0]\n", "print(f\" The predicted Daily Sales are :$ {np.round(y_temp_predict)}\")\n" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Transformation Pipeline and Model Successfully Saved\n" ] }, { "data": { "text/plain": [ "(Pipeline(memory=Memory(location=None),\n", " steps=[('numerical_imputer',\n", " TransformerWrapper(include=['Store', 'Holiday_Flag',\n", " 'Temperature', 'Fuel_Price', 'CPI',\n", " 'Unemployment', 'Year', 'Month',\n", " 'Day'],\n", " transformer=SimpleImputer())),\n", " ('categorical_imputer',\n", " TransformerWrapper(include=[],\n", " transformer=SimpleImputer(strategy='most_frequent'))),\n", " ('normalize', TransformerWr...\n", " gamma=0, gpu_id=-1, grow_policy='depthwise',\n", " importance_type=None, interaction_constraints='',\n", " learning_rate=0.300000012, max_bin=256,\n", " max_cat_to_onehot=4, max_delta_step=0,\n", " max_depth=6, max_leaves=0, min_child_weight=1,\n", " missing=nan, monotone_constraints='()',\n", " n_estimators=100, n_jobs=-1, num_parallel_tree=1,\n", " predictor='auto', random_state=8701, reg_alpha=0,\n", " reg_lambda=1, ...))]),\n", " './models/Walmart.pkl')" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "save_model(model,\"./models/Walmart\")" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "import pickle\n", "pickle.dump(model, open('./models/walmart_model.pkl', 'wb'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Monsieur j'ai choisi d'utiliser sur streamlit le model sans dump ceci pour une bonne raison.Pour la prediction juste avant le save model nous avons comme daily sales: **$688340.0**. Sur Streamlit, nous avons **$688339.6** avec le model sans dump.Tandis qu'avec le model pickle dumpé on a **$620000.0** ce qui est trop éloigné de notre valeur réelle qui est: **$690007.76**." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "2024 - All rights reserved - Bekombo Ntone Louis Jason Loic Walmart Daily_Sales Prediction Notebook" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 2 }