Spaces:

singhjagpreet
/

student-performance

Sleeping

App Files Files Community

JAGPREET SINGH commited on Sep 6, 2023

Commit

24a7a62

•

1 Parent(s): ad6a5c4

eda2

Browse files

Files changed (1) hide show

notebook/EDA.ipynb +67 -23

notebook/EDA.ipynb CHANGED Viewed

@@ -30,13 +30,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
     "import numpy as np\n",
     "import pandas as pd\n",
     "import matplotlib.pyplot as plt\n",
     "%matplotlib inline\n",
     "import warnings\n",
     "warnings.filterwarnings('ignore')"
@@ -51,7 +52,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -67,7 +68,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -177,7 +178,7 @@
        "4                    none          76             78             75  "
       ]
      },
-     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -188,7 +189,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -298,7 +299,7 @@
        "999                    none          77             86             86  "
       ]
      },
-     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -331,7 +332,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -340,7 +341,7 @@
        "(1000, 8)"
       ]
      },
-     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -371,7 +372,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -388,7 +389,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -414,7 +415,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
@@ -423,7 +424,7 @@
        "0"
       ]
      },
-     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -449,7 +450,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -496,7 +497,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -513,7 +514,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -540,7 +541,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -619,7 +620,7 @@
        "writing_score  1000.0  68.054  15.195657  10.0  57.75  69.0  79.0  100.0"
       ]
      },
-     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -639,7 +640,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
    "metadata": {},
    "outputs": [
     {
@@ -676,7 +677,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
@@ -711,7 +712,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
@@ -821,7 +822,7 @@
        "4                    none          76             78             75  "
       ]
      },
-     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -832,7 +833,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
    "metadata": {},
    "outputs": [
     {
@@ -858,7 +859,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
    "metadata": {},
    "outputs": [
     {
@@ -891,6 +892,49 @@
     "- students have performed worst in maths.\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,

   },
   {
    "cell_type": "code",
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
     "import numpy as np\n",
     "import pandas as pd\n",
     "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
     "%matplotlib inline\n",
     "import warnings\n",
     "warnings.filterwarnings('ignore')"
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
        "4                    none          76             78             75  "
       ]
      },
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
        "999                    none          77             86             86  "
       ]
      },
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
        "(1000, 8)"
       ]
      },
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
        "dtype: float64"
       ]
      },
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
        "0"
       ]
      },
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
        "dtype: int64"
       ]
      },
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
        "writing_score  1000.0  68.054  15.195657  10.0  57.75  69.0  79.0  100.0"
       ]
      },
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
        "4                    none          76             78             75  "
       ]
      },
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
     "- students have performed worst in maths.\n"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 4.1 EDA (Visualisation)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- Visualize score distribution to make conclusions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- Histogram\n",
+    "- Kernal Distribution Function (KDE)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "for col in numerical_cols:\n",
+    "    fig, axs = plt.subplots(1, 2, figsize=(15, 7))\n",
+    "    plt.subplot(121)\n",
+    "    sns.histplot(data=df, x=col,bins=30,kde=True,color='g')\n",
+    "    plt.subplot(122)\n",
+    "    sns.histplot(data=df,x=col,kde=True,hue='gender')\n",
+    "    plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### observation\n",
+    "- female students tend to perform better than male students across all subjects"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,