JAGPREET SINGH commited on
Commit
24a7a62
1 Parent(s): ad6a5c4
Files changed (1) hide show
  1. notebook/EDA.ipynb +67 -23
notebook/EDA.ipynb CHANGED
@@ -30,13 +30,14 @@
30
  },
31
  {
32
  "cell_type": "code",
33
- "execution_count": 14,
34
  "metadata": {},
35
  "outputs": [],
36
  "source": [
37
  "import numpy as np\n",
38
  "import pandas as pd\n",
39
  "import matplotlib.pyplot as plt\n",
 
40
  "%matplotlib inline\n",
41
  "import warnings\n",
42
  "warnings.filterwarnings('ignore')"
@@ -51,7 +52,7 @@
51
  },
52
  {
53
  "cell_type": "code",
54
- "execution_count": 15,
55
  "metadata": {},
56
  "outputs": [],
57
  "source": [
@@ -67,7 +68,7 @@
67
  },
68
  {
69
  "cell_type": "code",
70
- "execution_count": 18,
71
  "metadata": {},
72
  "outputs": [
73
  {
@@ -177,7 +178,7 @@
177
  "4 none 76 78 75 "
178
  ]
179
  },
180
- "execution_count": 18,
181
  "metadata": {},
182
  "output_type": "execute_result"
183
  }
@@ -188,7 +189,7 @@
188
  },
189
  {
190
  "cell_type": "code",
191
- "execution_count": 17,
192
  "metadata": {},
193
  "outputs": [
194
  {
@@ -298,7 +299,7 @@
298
  "999 none 77 86 86 "
299
  ]
300
  },
301
- "execution_count": 17,
302
  "metadata": {},
303
  "output_type": "execute_result"
304
  }
@@ -331,7 +332,7 @@
331
  },
332
  {
333
  "cell_type": "code",
334
- "execution_count": 19,
335
  "metadata": {},
336
  "outputs": [
337
  {
@@ -340,7 +341,7 @@
340
  "(1000, 8)"
341
  ]
342
  },
343
- "execution_count": 19,
344
  "metadata": {},
345
  "output_type": "execute_result"
346
  }
@@ -371,7 +372,7 @@
371
  },
372
  {
373
  "cell_type": "code",
374
- "execution_count": 20,
375
  "metadata": {},
376
  "outputs": [
377
  {
@@ -388,7 +389,7 @@
388
  "dtype: float64"
389
  ]
390
  },
391
- "execution_count": 20,
392
  "metadata": {},
393
  "output_type": "execute_result"
394
  }
@@ -414,7 +415,7 @@
414
  },
415
  {
416
  "cell_type": "code",
417
- "execution_count": 24,
418
  "metadata": {},
419
  "outputs": [
420
  {
@@ -423,7 +424,7 @@
423
  "0"
424
  ]
425
  },
426
- "execution_count": 24,
427
  "metadata": {},
428
  "output_type": "execute_result"
429
  }
@@ -449,7 +450,7 @@
449
  },
450
  {
451
  "cell_type": "code",
452
- "execution_count": 12,
453
  "metadata": {},
454
  "outputs": [
455
  {
@@ -496,7 +497,7 @@
496
  },
497
  {
498
  "cell_type": "code",
499
- "execution_count": 25,
500
  "metadata": {},
501
  "outputs": [
502
  {
@@ -513,7 +514,7 @@
513
  "dtype: int64"
514
  ]
515
  },
516
- "execution_count": 25,
517
  "metadata": {},
518
  "output_type": "execute_result"
519
  }
@@ -540,7 +541,7 @@
540
  },
541
  {
542
  "cell_type": "code",
543
- "execution_count": 26,
544
  "metadata": {},
545
  "outputs": [
546
  {
@@ -619,7 +620,7 @@
619
  "writing_score 1000.0 68.054 15.195657 10.0 57.75 69.0 79.0 100.0"
620
  ]
621
  },
622
- "execution_count": 26,
623
  "metadata": {},
624
  "output_type": "execute_result"
625
  }
@@ -639,7 +640,7 @@
639
  },
640
  {
641
  "cell_type": "code",
642
- "execution_count": 36,
643
  "metadata": {},
644
  "outputs": [
645
  {
@@ -676,7 +677,7 @@
676
  },
677
  {
678
  "cell_type": "code",
679
- "execution_count": 35,
680
  "metadata": {},
681
  "outputs": [
682
  {
@@ -711,7 +712,7 @@
711
  },
712
  {
713
  "cell_type": "code",
714
- "execution_count": 27,
715
  "metadata": {},
716
  "outputs": [
717
  {
@@ -821,7 +822,7 @@
821
  "4 none 76 78 75 "
822
  ]
823
  },
824
- "execution_count": 27,
825
  "metadata": {},
826
  "output_type": "execute_result"
827
  }
@@ -832,7 +833,7 @@
832
  },
833
  {
834
  "cell_type": "code",
835
- "execution_count": 43,
836
  "metadata": {},
837
  "outputs": [
838
  {
@@ -858,7 +859,7 @@
858
  },
859
  {
860
  "cell_type": "code",
861
- "execution_count": 45,
862
  "metadata": {},
863
  "outputs": [
864
  {
@@ -891,6 +892,49 @@
891
  "- students have performed worst in maths.\n"
892
  ]
893
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
894
  {
895
  "cell_type": "code",
896
  "execution_count": null,
 
30
  },
31
  {
32
  "cell_type": "code",
33
+ "execution_count": 17,
34
  "metadata": {},
35
  "outputs": [],
36
  "source": [
37
  "import numpy as np\n",
38
  "import pandas as pd\n",
39
  "import matplotlib.pyplot as plt\n",
40
+ "import seaborn as sns\n",
41
  "%matplotlib inline\n",
42
  "import warnings\n",
43
  "warnings.filterwarnings('ignore')"
 
52
  },
53
  {
54
  "cell_type": "code",
55
+ "execution_count": 2,
56
  "metadata": {},
57
  "outputs": [],
58
  "source": [
 
68
  },
69
  {
70
  "cell_type": "code",
71
+ "execution_count": 3,
72
  "metadata": {},
73
  "outputs": [
74
  {
 
178
  "4 none 76 78 75 "
179
  ]
180
  },
181
+ "execution_count": 3,
182
  "metadata": {},
183
  "output_type": "execute_result"
184
  }
 
189
  },
190
  {
191
  "cell_type": "code",
192
+ "execution_count": 4,
193
  "metadata": {},
194
  "outputs": [
195
  {
 
299
  "999 none 77 86 86 "
300
  ]
301
  },
302
+ "execution_count": 4,
303
  "metadata": {},
304
  "output_type": "execute_result"
305
  }
 
332
  },
333
  {
334
  "cell_type": "code",
335
+ "execution_count": 5,
336
  "metadata": {},
337
  "outputs": [
338
  {
 
341
  "(1000, 8)"
342
  ]
343
  },
344
+ "execution_count": 5,
345
  "metadata": {},
346
  "output_type": "execute_result"
347
  }
 
372
  },
373
  {
374
  "cell_type": "code",
375
+ "execution_count": 6,
376
  "metadata": {},
377
  "outputs": [
378
  {
 
389
  "dtype: float64"
390
  ]
391
  },
392
+ "execution_count": 6,
393
  "metadata": {},
394
  "output_type": "execute_result"
395
  }
 
415
  },
416
  {
417
  "cell_type": "code",
418
+ "execution_count": 7,
419
  "metadata": {},
420
  "outputs": [
421
  {
 
424
  "0"
425
  ]
426
  },
427
+ "execution_count": 7,
428
  "metadata": {},
429
  "output_type": "execute_result"
430
  }
 
450
  },
451
  {
452
  "cell_type": "code",
453
+ "execution_count": 8,
454
  "metadata": {},
455
  "outputs": [
456
  {
 
497
  },
498
  {
499
  "cell_type": "code",
500
+ "execution_count": 9,
501
  "metadata": {},
502
  "outputs": [
503
  {
 
514
  "dtype: int64"
515
  ]
516
  },
517
+ "execution_count": 9,
518
  "metadata": {},
519
  "output_type": "execute_result"
520
  }
 
541
  },
542
  {
543
  "cell_type": "code",
544
+ "execution_count": 10,
545
  "metadata": {},
546
  "outputs": [
547
  {
 
620
  "writing_score 1000.0 68.054 15.195657 10.0 57.75 69.0 79.0 100.0"
621
  ]
622
  },
623
+ "execution_count": 10,
624
  "metadata": {},
625
  "output_type": "execute_result"
626
  }
 
640
  },
641
  {
642
  "cell_type": "code",
643
+ "execution_count": 11,
644
  "metadata": {},
645
  "outputs": [
646
  {
 
677
  },
678
  {
679
  "cell_type": "code",
680
+ "execution_count": 12,
681
  "metadata": {},
682
  "outputs": [
683
  {
 
712
  },
713
  {
714
  "cell_type": "code",
715
+ "execution_count": 13,
716
  "metadata": {},
717
  "outputs": [
718
  {
 
822
  "4 none 76 78 75 "
823
  ]
824
  },
825
+ "execution_count": 13,
826
  "metadata": {},
827
  "output_type": "execute_result"
828
  }
 
833
  },
834
  {
835
  "cell_type": "code",
836
+ "execution_count": 14,
837
  "metadata": {},
838
  "outputs": [
839
  {
 
859
  },
860
  {
861
  "cell_type": "code",
862
+ "execution_count": 15,
863
  "metadata": {},
864
  "outputs": [
865
  {
 
892
  "- students have performed worst in maths.\n"
893
  ]
894
  },
895
+ {
896
+ "cell_type": "markdown",
897
+ "metadata": {},
898
+ "source": [
899
+ "#### 4.1 EDA (Visualisation)"
900
+ ]
901
+ },
902
+ {
903
+ "cell_type": "markdown",
904
+ "metadata": {},
905
+ "source": [
906
+ "- Visualize score distribution to make conclusions"
907
+ ]
908
+ },
909
+ {
910
+ "cell_type": "markdown",
911
+ "metadata": {},
912
+ "source": [
913
+ "- Histogram\n",
914
+ "- Kernal Distribution Function (KDE)"
915
+ ]
916
+ },
917
+ {
918
+ "cell_type": "markdown",
919
+ "metadata": {},
920
+ "source": [
921
+ "for col in numerical_cols:\n",
922
+ " fig, axs = plt.subplots(1, 2, figsize=(15, 7))\n",
923
+ " plt.subplot(121)\n",
924
+ " sns.histplot(data=df, x=col,bins=30,kde=True,color='g')\n",
925
+ " plt.subplot(122)\n",
926
+ " sns.histplot(data=df,x=col,kde=True,hue='gender')\n",
927
+ " plt.show()"
928
+ ]
929
+ },
930
+ {
931
+ "cell_type": "markdown",
932
+ "metadata": {},
933
+ "source": [
934
+ "### observation\n",
935
+ "- female students tend to perform better than male students across all subjects"
936
+ ]
937
+ },
938
  {
939
  "cell_type": "code",
940
  "execution_count": null,