diff --git "a/Notebooks/Diabetes Classification.ipynb" "b/Notebooks/Diabetes Classification.ipynb"
new file mode 100644--- /dev/null
+++ "b/Notebooks/Diabetes Classification.ipynb"
@@ -0,0 +1,1611 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### Data Installation"
+ ],
+ "metadata": {
+ "id": "CZK0VGVYnM7p"
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "IZa-s_kXT5N7",
+ "outputId": "002d97b5-67a8-4305-9119-185bddd63262"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Collecting opendatasets\n",
+ " Downloading opendatasets-0.1.22-py3-none-any.whl (15 kB)\n",
+ "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from opendatasets) (4.66.2)\n",
+ "Requirement already satisfied: kaggle in /usr/local/lib/python3.10/dist-packages (from opendatasets) (1.5.16)\n",
+ "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from opendatasets) (8.1.7)\n",
+ "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (1.16.0)\n",
+ "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (2024.2.2)\n",
+ "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (2.8.2)\n",
+ "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (2.31.0)\n",
+ "Requirement already satisfied: python-slugify in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (8.0.4)\n",
+ "Requirement already satisfied: urllib3 in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (2.0.7)\n",
+ "Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (6.1.0)\n",
+ "Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->kaggle->opendatasets) (0.5.1)\n",
+ "Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.10/dist-packages (from python-slugify->kaggle->opendatasets) (1.3)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle->opendatasets) (3.3.2)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle->opendatasets) (3.6)\n",
+ "Installing collected packages: opendatasets\n",
+ "Successfully installed opendatasets-0.1.22\n"
+ ]
+ }
+ ],
+ "source": [
+ "\"!pip install opendatasets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import opendatasets as od\n",
+ "od.download('https://www.kaggle.com/datasets/akshaydattatraykhare/diabetes-dataset/download?datasetVersionNumber=1')"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "tvr-DX6IUX8J",
+ "outputId": "19746d51-d575-4520-b604-099412f936ac"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Downloading diabetes-dataset.zip to ./diabetes-dataset\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "100%|██████████| 8.91k/8.91k [00:00<00:00, 9.66MB/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### Data Preparation"
+ ],
+ "metadata": {
+ "id": "krk56CApnPfs"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import pandas as pd\n",
+ "df = pd.read_csv('/content/diabetes-dataset/diabetes.csv')"
+ ],
+ "metadata": {
+ "id": "TQlFD1yNUcTf"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.shape"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "NkndZ_pWUtBz",
+ "outputId": "82950c5b-1e79-4f92-8f4a-e5bc5dd4bac7"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "(768, 9)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 187
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.head()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "id": "PM9EK3DgUnq1",
+ "outputId": "c18b38a6-d908-410a-cc8c-40039559d666"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
+ "0 6 148 72 35 0 33.6 \n",
+ "1 1 85 66 29 0 26.6 \n",
+ "2 8 183 64 0 0 23.3 \n",
+ "3 1 89 66 23 94 28.1 \n",
+ "4 0 137 40 35 168 43.1 \n",
+ "\n",
+ " DiabetesPedigreeFunction Age Outcome \n",
+ "0 0.627 50 1 \n",
+ "1 0.351 31 0 \n",
+ "2 0.672 32 1 \n",
+ "3 0.167 21 0 \n",
+ "4 2.288 33 1 "
+ ],
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Pregnancies | \n",
+ " Glucose | \n",
+ " BloodPressure | \n",
+ " SkinThickness | \n",
+ " Insulin | \n",
+ " BMI | \n",
+ " DiabetesPedigreeFunction | \n",
+ " Age | \n",
+ " Outcome | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 6 | \n",
+ " 148 | \n",
+ " 72 | \n",
+ " 35 | \n",
+ " 0 | \n",
+ " 33.6 | \n",
+ " 0.627 | \n",
+ " 50 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 85 | \n",
+ " 66 | \n",
+ " 29 | \n",
+ " 0 | \n",
+ " 26.6 | \n",
+ " 0.351 | \n",
+ " 31 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 8 | \n",
+ " 183 | \n",
+ " 64 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 23.3 | \n",
+ " 0.672 | \n",
+ " 32 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 89 | \n",
+ " 66 | \n",
+ " 23 | \n",
+ " 94 | \n",
+ " 28.1 | \n",
+ " 0.167 | \n",
+ " 21 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 137 | \n",
+ " 40 | \n",
+ " 35 | \n",
+ " 168 | \n",
+ " 43.1 | \n",
+ " 2.288 | \n",
+ " 33 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df",
+ "summary": "{\n \"name\": \"df\",\n \"rows\": 768,\n \"fields\": [\n {\n \"column\": \"Pregnancies\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 0,\n \"max\": 17,\n \"num_unique_values\": 17,\n \"samples\": [\n 6,\n 1,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Glucose\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 31,\n \"min\": 0,\n \"max\": 199,\n \"num_unique_values\": 136,\n \"samples\": [\n 151,\n 101,\n 112\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"BloodPressure\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 19,\n \"min\": 0,\n \"max\": 122,\n \"num_unique_values\": 47,\n \"samples\": [\n 86,\n 46,\n 85\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"SkinThickness\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 15,\n \"min\": 0,\n \"max\": 99,\n \"num_unique_values\": 51,\n \"samples\": [\n 7,\n 12,\n 48\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Insulin\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 115,\n \"min\": 0,\n \"max\": 846,\n \"num_unique_values\": 186,\n \"samples\": [\n 52,\n 41,\n 183\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"BMI\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7.884160320375446,\n \"min\": 0.0,\n \"max\": 67.1,\n \"num_unique_values\": 248,\n \"samples\": [\n 19.9,\n 31.0,\n 38.1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"DiabetesPedigreeFunction\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.3313285950127749,\n \"min\": 0.078,\n \"max\": 2.42,\n \"num_unique_values\": 517,\n \"samples\": [\n 1.731,\n 0.426,\n 0.138\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 11,\n \"min\": 21,\n \"max\": 81,\n \"num_unique_values\": 52,\n \"samples\": [\n 60,\n 47,\n 72\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Outcome\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 188
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#Relation of feature columns with the target column\n",
+ "df.corr()['Outcome']"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "_Y_vcqGeX9HT",
+ "outputId": "a97277ea-ccd6-42e7-9931-636877b38282"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Pregnancies 0.221898\n",
+ "Glucose 0.466581\n",
+ "BloodPressure 0.065068\n",
+ "SkinThickness 0.074752\n",
+ "Insulin 0.130548\n",
+ "BMI 0.292695\n",
+ "DiabetesPedigreeFunction 0.173844\n",
+ "Age 0.238356\n",
+ "Outcome 1.000000\n",
+ "Name: Outcome, dtype: float64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 189
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df = df.drop(columns=['Pregnancies', 'BloodPressure','SkinThickness',\t'Insulin', 'DiabetesPedigreeFunction' ])\n",
+ "df.columns"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "qKkFgLdvUr6T",
+ "outputId": "a6ce6b94-a9c4-48f6-f1eb-99f4a7932ee1"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Index(['Glucose', 'BMI', 'Age', 'Outcome'], dtype='object')"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 190
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.isnull().sum()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "LiscsDZsXRgs",
+ "outputId": "4fd0e722-45f2-459a-e4cb-0816c87db3c4"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Glucose 0\n",
+ "BMI 0\n",
+ "Age 0\n",
+ "Outcome 0\n",
+ "dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 191
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns\n",
+ "\n",
+ "fig, axes = plt.subplots(3, figsize=(5, 5))\n",
+ "\n",
+ "sns.histplot(data=df['Glucose'], kde=True, ax=axes[0], color='blue').set(title='Glucose Histogram')\n",
+ "sns.histplot(data=df['BMI'], kde=True, ax=axes[1], color='grey').set(title='BMI Histogram')\n",
+ "sns.histplot(data=df['Age'], kde=True, ax=axes[2], color='black').set(title='Age Histogram')\n",
+ "\n",
+ "plt.tight_layout()\n",
+ "plt.show()\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 506
+ },
+ "id": "TrQODYoyXVVD",
+ "outputId": "aa032378-fde8-4b46-d571-ba52d3016d03"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "