{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "### Data Installation" ], "metadata": { "id": "CZK0VGVYnM7p" } }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "IZa-s_kXT5N7", "outputId": "002d97b5-67a8-4305-9119-185bddd63262" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting opendatasets\n", " Downloading opendatasets-0.1.22-py3-none-any.whl (15 kB)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from opendatasets) (4.66.2)\n", "Requirement already satisfied: kaggle in /usr/local/lib/python3.10/dist-packages (from opendatasets) (1.5.16)\n", "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from opendatasets) (8.1.7)\n", "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (1.16.0)\n", "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (2024.2.2)\n", "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (2.8.2)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (2.31.0)\n", "Requirement already satisfied: python-slugify in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (8.0.4)\n", "Requirement already satisfied: urllib3 in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (2.0.7)\n", "Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (6.1.0)\n", "Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->kaggle->opendatasets) (0.5.1)\n", "Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.10/dist-packages (from python-slugify->kaggle->opendatasets) (1.3)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle->opendatasets) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle->opendatasets) (3.6)\n", "Installing collected packages: opendatasets\n", "Successfully installed opendatasets-0.1.22\n" ] } ], "source": [ "\"!pip install opendatasets" ] }, { "cell_type": "code", "source": [ "import opendatasets as od\n", "od.download('https://www.kaggle.com/datasets/akshaydattatraykhare/diabetes-dataset/download?datasetVersionNumber=1')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "tvr-DX6IUX8J", "outputId": "19746d51-d575-4520-b604-099412f936ac" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Downloading diabetes-dataset.zip to ./diabetes-dataset\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "100%|██████████| 8.91k/8.91k [00:00<00:00, 9.66MB/s]" ] }, { "output_type": "stream", "name": "stdout", "text": [ "\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "\n" ] } ] }, { "cell_type": "markdown", "source": [ "### Data Preparation" ], "metadata": { "id": "krk56CApnPfs" } }, { "cell_type": "code", "source": [ "import pandas as pd\n", "df = pd.read_csv('/content/diabetes-dataset/diabetes.csv')" ], "metadata": { "id": "TQlFD1yNUcTf" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "df.shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "NkndZ_pWUtBz", "outputId": "82950c5b-1e79-4f92-8f4a-e5bc5dd4bac7" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(768, 9)" ] }, "metadata": {}, "execution_count": 187 } ] }, { "cell_type": "code", "source": [ "df.head()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "PM9EK3DgUnq1", "outputId": "c18b38a6-d908-410a-cc8c-40039559d666" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n", "0 6 148 72 35 0 33.6 \n", "1 1 85 66 29 0 26.6 \n", "2 8 183 64 0 0 23.3 \n", "3 1 89 66 23 94 28.1 \n", "4 0 137 40 35 168 43.1 \n", "\n", " DiabetesPedigreeFunction Age Outcome \n", "0 0.627 50 1 \n", "1 0.351 31 0 \n", "2 0.672 32 1 \n", "3 0.167 21 0 \n", "4 2.288 33 1 " ], "text/html": [ "\n", "
\n", " | Pregnancies | \n", "Glucose | \n", "BloodPressure | \n", "SkinThickness | \n", "Insulin | \n", "BMI | \n", "DiabetesPedigreeFunction | \n", "Age | \n", "Outcome | \n", "
---|---|---|---|---|---|---|---|---|---|
0 | \n", "6 | \n", "148 | \n", "72 | \n", "35 | \n", "0 | \n", "33.6 | \n", "0.627 | \n", "50 | \n", "1 | \n", "
1 | \n", "1 | \n", "85 | \n", "66 | \n", "29 | \n", "0 | \n", "26.6 | \n", "0.351 | \n", "31 | \n", "0 | \n", "
2 | \n", "8 | \n", "183 | \n", "64 | \n", "0 | \n", "0 | \n", "23.3 | \n", "0.672 | \n", "32 | \n", "1 | \n", "
3 | \n", "1 | \n", "89 | \n", "66 | \n", "23 | \n", "94 | \n", "28.1 | \n", "0.167 | \n", "21 | \n", "0 | \n", "
4 | \n", "0 | \n", "137 | \n", "40 | \n", "35 | \n", "168 | \n", "43.1 | \n", "2.288 | \n", "33 | \n", "1 | \n", "