Spaces:
Sleeping
Sleeping
JAGPREET SINGH
commited on
Commit
•
24a7a62
1
Parent(s):
ad6a5c4
eda2
Browse files- notebook/EDA.ipynb +67 -23
notebook/EDA.ipynb
CHANGED
@@ -30,13 +30,14 @@
|
|
30 |
},
|
31 |
{
|
32 |
"cell_type": "code",
|
33 |
-
"execution_count":
|
34 |
"metadata": {},
|
35 |
"outputs": [],
|
36 |
"source": [
|
37 |
"import numpy as np\n",
|
38 |
"import pandas as pd\n",
|
39 |
"import matplotlib.pyplot as plt\n",
|
|
|
40 |
"%matplotlib inline\n",
|
41 |
"import warnings\n",
|
42 |
"warnings.filterwarnings('ignore')"
|
@@ -51,7 +52,7 @@
|
|
51 |
},
|
52 |
{
|
53 |
"cell_type": "code",
|
54 |
-
"execution_count":
|
55 |
"metadata": {},
|
56 |
"outputs": [],
|
57 |
"source": [
|
@@ -67,7 +68,7 @@
|
|
67 |
},
|
68 |
{
|
69 |
"cell_type": "code",
|
70 |
-
"execution_count":
|
71 |
"metadata": {},
|
72 |
"outputs": [
|
73 |
{
|
@@ -177,7 +178,7 @@
|
|
177 |
"4 none 76 78 75 "
|
178 |
]
|
179 |
},
|
180 |
-
"execution_count":
|
181 |
"metadata": {},
|
182 |
"output_type": "execute_result"
|
183 |
}
|
@@ -188,7 +189,7 @@
|
|
188 |
},
|
189 |
{
|
190 |
"cell_type": "code",
|
191 |
-
"execution_count":
|
192 |
"metadata": {},
|
193 |
"outputs": [
|
194 |
{
|
@@ -298,7 +299,7 @@
|
|
298 |
"999 none 77 86 86 "
|
299 |
]
|
300 |
},
|
301 |
-
"execution_count":
|
302 |
"metadata": {},
|
303 |
"output_type": "execute_result"
|
304 |
}
|
@@ -331,7 +332,7 @@
|
|
331 |
},
|
332 |
{
|
333 |
"cell_type": "code",
|
334 |
-
"execution_count":
|
335 |
"metadata": {},
|
336 |
"outputs": [
|
337 |
{
|
@@ -340,7 +341,7 @@
|
|
340 |
"(1000, 8)"
|
341 |
]
|
342 |
},
|
343 |
-
"execution_count":
|
344 |
"metadata": {},
|
345 |
"output_type": "execute_result"
|
346 |
}
|
@@ -371,7 +372,7 @@
|
|
371 |
},
|
372 |
{
|
373 |
"cell_type": "code",
|
374 |
-
"execution_count":
|
375 |
"metadata": {},
|
376 |
"outputs": [
|
377 |
{
|
@@ -388,7 +389,7 @@
|
|
388 |
"dtype: float64"
|
389 |
]
|
390 |
},
|
391 |
-
"execution_count":
|
392 |
"metadata": {},
|
393 |
"output_type": "execute_result"
|
394 |
}
|
@@ -414,7 +415,7 @@
|
|
414 |
},
|
415 |
{
|
416 |
"cell_type": "code",
|
417 |
-
"execution_count":
|
418 |
"metadata": {},
|
419 |
"outputs": [
|
420 |
{
|
@@ -423,7 +424,7 @@
|
|
423 |
"0"
|
424 |
]
|
425 |
},
|
426 |
-
"execution_count":
|
427 |
"metadata": {},
|
428 |
"output_type": "execute_result"
|
429 |
}
|
@@ -449,7 +450,7 @@
|
|
449 |
},
|
450 |
{
|
451 |
"cell_type": "code",
|
452 |
-
"execution_count":
|
453 |
"metadata": {},
|
454 |
"outputs": [
|
455 |
{
|
@@ -496,7 +497,7 @@
|
|
496 |
},
|
497 |
{
|
498 |
"cell_type": "code",
|
499 |
-
"execution_count":
|
500 |
"metadata": {},
|
501 |
"outputs": [
|
502 |
{
|
@@ -513,7 +514,7 @@
|
|
513 |
"dtype: int64"
|
514 |
]
|
515 |
},
|
516 |
-
"execution_count":
|
517 |
"metadata": {},
|
518 |
"output_type": "execute_result"
|
519 |
}
|
@@ -540,7 +541,7 @@
|
|
540 |
},
|
541 |
{
|
542 |
"cell_type": "code",
|
543 |
-
"execution_count":
|
544 |
"metadata": {},
|
545 |
"outputs": [
|
546 |
{
|
@@ -619,7 +620,7 @@
|
|
619 |
"writing_score 1000.0 68.054 15.195657 10.0 57.75 69.0 79.0 100.0"
|
620 |
]
|
621 |
},
|
622 |
-
"execution_count":
|
623 |
"metadata": {},
|
624 |
"output_type": "execute_result"
|
625 |
}
|
@@ -639,7 +640,7 @@
|
|
639 |
},
|
640 |
{
|
641 |
"cell_type": "code",
|
642 |
-
"execution_count":
|
643 |
"metadata": {},
|
644 |
"outputs": [
|
645 |
{
|
@@ -676,7 +677,7 @@
|
|
676 |
},
|
677 |
{
|
678 |
"cell_type": "code",
|
679 |
-
"execution_count":
|
680 |
"metadata": {},
|
681 |
"outputs": [
|
682 |
{
|
@@ -711,7 +712,7 @@
|
|
711 |
},
|
712 |
{
|
713 |
"cell_type": "code",
|
714 |
-
"execution_count":
|
715 |
"metadata": {},
|
716 |
"outputs": [
|
717 |
{
|
@@ -821,7 +822,7 @@
|
|
821 |
"4 none 76 78 75 "
|
822 |
]
|
823 |
},
|
824 |
-
"execution_count":
|
825 |
"metadata": {},
|
826 |
"output_type": "execute_result"
|
827 |
}
|
@@ -832,7 +833,7 @@
|
|
832 |
},
|
833 |
{
|
834 |
"cell_type": "code",
|
835 |
-
"execution_count":
|
836 |
"metadata": {},
|
837 |
"outputs": [
|
838 |
{
|
@@ -858,7 +859,7 @@
|
|
858 |
},
|
859 |
{
|
860 |
"cell_type": "code",
|
861 |
-
"execution_count":
|
862 |
"metadata": {},
|
863 |
"outputs": [
|
864 |
{
|
@@ -891,6 +892,49 @@
|
|
891 |
"- students have performed worst in maths.\n"
|
892 |
]
|
893 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
894 |
{
|
895 |
"cell_type": "code",
|
896 |
"execution_count": null,
|
|
|
30 |
},
|
31 |
{
|
32 |
"cell_type": "code",
|
33 |
+
"execution_count": 17,
|
34 |
"metadata": {},
|
35 |
"outputs": [],
|
36 |
"source": [
|
37 |
"import numpy as np\n",
|
38 |
"import pandas as pd\n",
|
39 |
"import matplotlib.pyplot as plt\n",
|
40 |
+
"import seaborn as sns\n",
|
41 |
"%matplotlib inline\n",
|
42 |
"import warnings\n",
|
43 |
"warnings.filterwarnings('ignore')"
|
|
|
52 |
},
|
53 |
{
|
54 |
"cell_type": "code",
|
55 |
+
"execution_count": 2,
|
56 |
"metadata": {},
|
57 |
"outputs": [],
|
58 |
"source": [
|
|
|
68 |
},
|
69 |
{
|
70 |
"cell_type": "code",
|
71 |
+
"execution_count": 3,
|
72 |
"metadata": {},
|
73 |
"outputs": [
|
74 |
{
|
|
|
178 |
"4 none 76 78 75 "
|
179 |
]
|
180 |
},
|
181 |
+
"execution_count": 3,
|
182 |
"metadata": {},
|
183 |
"output_type": "execute_result"
|
184 |
}
|
|
|
189 |
},
|
190 |
{
|
191 |
"cell_type": "code",
|
192 |
+
"execution_count": 4,
|
193 |
"metadata": {},
|
194 |
"outputs": [
|
195 |
{
|
|
|
299 |
"999 none 77 86 86 "
|
300 |
]
|
301 |
},
|
302 |
+
"execution_count": 4,
|
303 |
"metadata": {},
|
304 |
"output_type": "execute_result"
|
305 |
}
|
|
|
332 |
},
|
333 |
{
|
334 |
"cell_type": "code",
|
335 |
+
"execution_count": 5,
|
336 |
"metadata": {},
|
337 |
"outputs": [
|
338 |
{
|
|
|
341 |
"(1000, 8)"
|
342 |
]
|
343 |
},
|
344 |
+
"execution_count": 5,
|
345 |
"metadata": {},
|
346 |
"output_type": "execute_result"
|
347 |
}
|
|
|
372 |
},
|
373 |
{
|
374 |
"cell_type": "code",
|
375 |
+
"execution_count": 6,
|
376 |
"metadata": {},
|
377 |
"outputs": [
|
378 |
{
|
|
|
389 |
"dtype: float64"
|
390 |
]
|
391 |
},
|
392 |
+
"execution_count": 6,
|
393 |
"metadata": {},
|
394 |
"output_type": "execute_result"
|
395 |
}
|
|
|
415 |
},
|
416 |
{
|
417 |
"cell_type": "code",
|
418 |
+
"execution_count": 7,
|
419 |
"metadata": {},
|
420 |
"outputs": [
|
421 |
{
|
|
|
424 |
"0"
|
425 |
]
|
426 |
},
|
427 |
+
"execution_count": 7,
|
428 |
"metadata": {},
|
429 |
"output_type": "execute_result"
|
430 |
}
|
|
|
450 |
},
|
451 |
{
|
452 |
"cell_type": "code",
|
453 |
+
"execution_count": 8,
|
454 |
"metadata": {},
|
455 |
"outputs": [
|
456 |
{
|
|
|
497 |
},
|
498 |
{
|
499 |
"cell_type": "code",
|
500 |
+
"execution_count": 9,
|
501 |
"metadata": {},
|
502 |
"outputs": [
|
503 |
{
|
|
|
514 |
"dtype: int64"
|
515 |
]
|
516 |
},
|
517 |
+
"execution_count": 9,
|
518 |
"metadata": {},
|
519 |
"output_type": "execute_result"
|
520 |
}
|
|
|
541 |
},
|
542 |
{
|
543 |
"cell_type": "code",
|
544 |
+
"execution_count": 10,
|
545 |
"metadata": {},
|
546 |
"outputs": [
|
547 |
{
|
|
|
620 |
"writing_score 1000.0 68.054 15.195657 10.0 57.75 69.0 79.0 100.0"
|
621 |
]
|
622 |
},
|
623 |
+
"execution_count": 10,
|
624 |
"metadata": {},
|
625 |
"output_type": "execute_result"
|
626 |
}
|
|
|
640 |
},
|
641 |
{
|
642 |
"cell_type": "code",
|
643 |
+
"execution_count": 11,
|
644 |
"metadata": {},
|
645 |
"outputs": [
|
646 |
{
|
|
|
677 |
},
|
678 |
{
|
679 |
"cell_type": "code",
|
680 |
+
"execution_count": 12,
|
681 |
"metadata": {},
|
682 |
"outputs": [
|
683 |
{
|
|
|
712 |
},
|
713 |
{
|
714 |
"cell_type": "code",
|
715 |
+
"execution_count": 13,
|
716 |
"metadata": {},
|
717 |
"outputs": [
|
718 |
{
|
|
|
822 |
"4 none 76 78 75 "
|
823 |
]
|
824 |
},
|
825 |
+
"execution_count": 13,
|
826 |
"metadata": {},
|
827 |
"output_type": "execute_result"
|
828 |
}
|
|
|
833 |
},
|
834 |
{
|
835 |
"cell_type": "code",
|
836 |
+
"execution_count": 14,
|
837 |
"metadata": {},
|
838 |
"outputs": [
|
839 |
{
|
|
|
859 |
},
|
860 |
{
|
861 |
"cell_type": "code",
|
862 |
+
"execution_count": 15,
|
863 |
"metadata": {},
|
864 |
"outputs": [
|
865 |
{
|
|
|
892 |
"- students have performed worst in maths.\n"
|
893 |
]
|
894 |
},
|
895 |
+
{
|
896 |
+
"cell_type": "markdown",
|
897 |
+
"metadata": {},
|
898 |
+
"source": [
|
899 |
+
"#### 4.1 EDA (Visualisation)"
|
900 |
+
]
|
901 |
+
},
|
902 |
+
{
|
903 |
+
"cell_type": "markdown",
|
904 |
+
"metadata": {},
|
905 |
+
"source": [
|
906 |
+
"- Visualize score distribution to make conclusions"
|
907 |
+
]
|
908 |
+
},
|
909 |
+
{
|
910 |
+
"cell_type": "markdown",
|
911 |
+
"metadata": {},
|
912 |
+
"source": [
|
913 |
+
"- Histogram\n",
|
914 |
+
"- Kernal Distribution Function (KDE)"
|
915 |
+
]
|
916 |
+
},
|
917 |
+
{
|
918 |
+
"cell_type": "markdown",
|
919 |
+
"metadata": {},
|
920 |
+
"source": [
|
921 |
+
"for col in numerical_cols:\n",
|
922 |
+
" fig, axs = plt.subplots(1, 2, figsize=(15, 7))\n",
|
923 |
+
" plt.subplot(121)\n",
|
924 |
+
" sns.histplot(data=df, x=col,bins=30,kde=True,color='g')\n",
|
925 |
+
" plt.subplot(122)\n",
|
926 |
+
" sns.histplot(data=df,x=col,kde=True,hue='gender')\n",
|
927 |
+
" plt.show()"
|
928 |
+
]
|
929 |
+
},
|
930 |
+
{
|
931 |
+
"cell_type": "markdown",
|
932 |
+
"metadata": {},
|
933 |
+
"source": [
|
934 |
+
"### observation\n",
|
935 |
+
"- female students tend to perform better than male students across all subjects"
|
936 |
+
]
|
937 |
+
},
|
938 |
{
|
939 |
"cell_type": "code",
|
940 |
"execution_count": null,
|