hassaanik commited on Sep 19, 2024

Commit

02b9964

verified ·

1 Parent(s): 95d4dfe

Upload 34 files

Browse files

Files changed (34) hide show

Data/cleaned_QA_data.csv +0 -0
Data/cleaned_med_QA_data.csv +0 -0
Notebooks/Couselling_from_Scratch.ipynb +0 -0
Notebooks/Medication_from_Scratch.ipynb +1348 -0
app.py +118 -0
backend/__init__.py +0 -0
backend/__pycache__/__init__.cpython-312.pyc +0 -0
backend/__pycache__/utils.cpython-312.pyc +0 -0
backend/models/diabetes_model/random_forest_modelf.joblib +3 -0
backend/models/diabetes_model/standard_scaler.joblib +3 -0
backend/models/medication_classification_model/age_scaler.pkl +3 -0
backend/models/medication_classification_model/knn_model.pkl +3 -0
backend/models/medication_classification_model/label_encoders.pkl +3 -0
backend/models/medication_classification_model/medication_encoder.pkl +3 -0
backend/models/medication_info/config.json +39 -0
backend/models/medication_info/generation_config.json +6 -0
backend/models/medication_info/merges.txt +0 -0
backend/models/medication_info/model.safetensors +3 -0
backend/models/medication_info/special_tokens_map.json +24 -0
backend/models/medication_info/tokenizer_config.json +22 -0
backend/models/medication_info/training_args.bin +3 -0
backend/models/medication_info/vocab.json +0 -0
backend/models/mental_health_model/config.json +39 -0
backend/models/mental_health_model/generation_config.json +6 -0
backend/models/mental_health_model/merges.txt +0 -0
backend/models/mental_health_model/model.safetensors +3 -0
backend/models/mental_health_model/special_tokens_map.json +24 -0
backend/models/mental_health_model/tokenizer_config.json +22 -0
backend/models/mental_health_model/training_args.bin +3 -0
backend/models/mental_health_model/vocab.json +0 -0
backend/utils.py +125 -0
frontend/index.html +80 -0
frontend/script.js +87 -0
frontend/styles.css +89 -0

Data/cleaned_QA_data.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

Data/cleaned_med_QA_data.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

Notebooks/Couselling_from_Scratch.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

Notebooks/Medication_from_Scratch.ipynb ADDED Viewed

	@@ -0,0 +1,1348 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Data Preparation"
+      ],
+      "metadata": {
+        "id": "ga8c1nhja4Qy"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install opendatasets"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "O7NczD5abI6o",
+        "outputId": "422faa21-1ee0-4582-9315-4c2b01f4518d"
+      },
+      "execution_count": 1,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Collecting opendatasets\n",
+            "  Downloading opendatasets-0.1.22-py3-none-any.whl.metadata (9.2 kB)\n",
+            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from opendatasets) (4.66.5)\n",
+            "Requirement already satisfied: kaggle in /usr/local/lib/python3.10/dist-packages (from opendatasets) (1.6.17)\n",
+            "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from opendatasets) (8.1.7)\n",
+            "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (1.16.0)\n",
+            "Requirement already satisfied: certifi>=2023.7.22 in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (2024.8.30)\n",
+            "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (2.8.2)\n",
+            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (2.32.3)\n",
+            "Requirement already satisfied: python-slugify in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (8.0.4)\n",
+            "Requirement already satisfied: urllib3 in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (2.0.7)\n",
+            "Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from kaggle->opendatasets) (6.1.0)\n",
+            "Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->kaggle->opendatasets) (0.5.1)\n",
+            "Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.10/dist-packages (from python-slugify->kaggle->opendatasets) (1.3)\n",
+            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle->opendatasets) (3.3.2)\n",
+            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle->opendatasets) (3.10)\n",
+            "Downloading opendatasets-0.1.22-py3-none-any.whl (15 kB)\n",
+            "Installing collected packages: opendatasets\n",
+            "Successfully installed opendatasets-0.1.22\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import opendatasets as od\n",
+        "od.download('https://www.kaggle.com/datasets/hassaanidrees/medinfo?select=MedInfo2019-QA-Medications.xlsx')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "7QSxa8cRbIug",
+        "outputId": "088ef3d5-b3fc-4860-8928-bb872ff83ab5"
+      },
+      "execution_count": 2,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Dataset URL: https://www.kaggle.com/datasets/hassaanidrees/medinfo\n",
+            "Downloading medinfo.zip to ./medinfo\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "100%|██████████| 159k/159k [00:00<00:00, 480kB/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Import pandas for data analysis\n",
+        "import pandas as pd\n",
+        "df = pd.read_excel(\"/content/medinfo/MedInfo2019-QA-Medications.xlsx\")\n",
+        "df = df[['Question','Answer']]"
+      ],
+      "metadata": {
+        "id": "sooD64r3bIDJ"
+      },
+      "execution_count": 3,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df.head() #show first five rows"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 206
+        },
+        "id": "eRneQPLAqAJL",
+        "outputId": "d1772f7e-8edd-4687-9c1a-c3102e86138e"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                                            Question  \\\n",
+              "0  how does rivatigmine and otc sleep medicine in...   \n",
+              "1                   how does valium affect the brain   \n",
+              "2                                   what is morphine   \n",
+              "3            what are the milligrams for oxycodone e   \n",
+              "4     81% aspirin contain resin and shellac in it. ?   \n",
+              "\n",
+              "                                              Answer  \n",
+              "0  tell your doctor and pharmacist what prescript...  \n",
+              "1  Diazepam is a benzodiazepine that exerts anxio...  \n",
+              "2  Morphine is a pain medication of the opiate fa...  \n",
+              "3                … 10 mg … 20 mg … 40 mg … 80 mg ...  \n",
+              "4              Inactive Ingredients  Ingredient Name  "
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-d79eadfb-a1cc-4af0-87f3-9921298edcfe\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Question</th>\n",
+              "      <th>Answer</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>how does rivatigmine and otc sleep medicine in...</td>\n",
+              "      <td>tell your doctor and pharmacist what prescript...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>how does valium affect the brain</td>\n",
+              "      <td>Diazepam is a benzodiazepine that exerts anxio...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>what is morphine</td>\n",
+              "      <td>Morphine is a pain medication of the opiate fa...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>what are the milligrams for oxycodone e</td>\n",
+              "      <td>… 10 mg … 20 mg … 40 mg … 80 mg ...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>81% aspirin contain resin and shellac in it. ?</td>\n",
+              "      <td>Inactive Ingredients  Ingredient Name</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-d79eadfb-a1cc-4af0-87f3-9921298edcfe')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-d79eadfb-a1cc-4af0-87f3-9921298edcfe button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-d79eadfb-a1cc-4af0-87f3-9921298edcfe');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-862caeb9-15bf-47b1-b083-8b9307722b80\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-862caeb9-15bf-47b1-b083-8b9307722b80')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-862caeb9-15bf-47b1-b083-8b9307722b80 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "df",
+              "summary": "{\n  \"name\": \"df\",\n  \"rows\": 690,\n  \"fields\": [\n    {\n      \"column\": \"Question\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 651,\n        \"samples\": [\n          \"how is marijuana used\",\n          \"tudorza pressair is what schedule drug\",\n          \"how long does ecstasy or mda leave your body\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Answer\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 652,\n        \"samples\": [\n          \"Marijuana is best known as a drug that people smoke or eat to get high. It is derived from the plant Cannabis sativa. Possession of marijuana is illegal under federal law. Medical marijuana refers to using marijuana to treat certain medical conditions. In the United States, about half of the states have legalized marijuana for medical use.\",\n          \"Color - GRAY, Shape - CAPSULE (biconvex), Score - no score, Size - 12mm, Imprint Code - m10\",\n          \"Quantity: 60; Per Unit: $4.68 \\u2013 $15.91; Price: $280.99 \\u2013 $954.47\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 4
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df.Question[0]"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 36
+        },
+        "id": "4SEkJJHwqBwo",
+        "outputId": "7aeec0ad-b51a-44fa-f2e1-5a93b61246d5"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "'how does rivatigmine and otc sleep medicine interact'"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            }
+          },
+          "metadata": {},
+          "execution_count": 5
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df.Answer[0]"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 105
+        },
+        "id": "qTllg8a-qGXW",
+        "outputId": "a6b8bca7-135e-4e26-e0ff-a2a1424bc45c"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "\"tell your doctor and pharmacist what prescription and nonprescription medications, vitamins, nutritional supplements, and herbal products you are taking or plan to take. Be sure to mention any of the following: antihistamines; aspirin and other nonsteroidal anti-inflammatory medications (NSAIDs) such as ibuprofen (Advil, Motrin) and naproxen (Aleve, Naprosyn); bethanechol (Duvoid, Urecholine); ipratropium (Atrovent, in Combivent, DuoNeb); and medications for Alzheimer's disease, glaucoma, irritable bowel disease, motion sickness, ulcers, or urinary problems. Your doctor may need to change the doses of your medications or monitor you carefully for side effects.\""
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            }
+          },
+          "metadata": {},
+          "execution_count": 6
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df.shape # 690 rows | 2 cols"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "xs_qECG1qIW5",
+        "outputId": "678a409c-9164-48f4-803e-501d3dff3c96"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "(690, 2)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 7
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install cleantext"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "LPvkkbdbrNp-",
+        "outputId": "938e6a8d-fb4b-4112-9a0e-3139146e56eb"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Collecting cleantext\n",
+            "  Downloading cleantext-1.1.4-py3-none-any.whl.metadata (3.5 kB)\n",
+            "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from cleantext) (3.8.1)\n",
+            "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->cleantext) (8.1.7)\n",
+            "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->cleantext) (1.4.2)\n",
+            "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk->cleantext) (2024.5.15)\n",
+            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from nltk->cleantext) (4.66.5)\n",
+            "Downloading cleantext-1.1.4-py3-none-any.whl (4.9 kB)\n",
+            "Installing collected packages: cleantext\n",
+            "Successfully installed cleantext-1.1.4\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import cleantext\n",
+        "\n",
+        "# Function to clean text data by removing unwanted characters and formatting\n",
+        "def clean(textdata):\n",
+        "    cleaned_text = []\n",
+        "    for i in textdata:\n",
+        "        cleaned_text.append(cleantext.clean(str(i), extra_spaces=True, lowercase=True, stopwords=False, stemming=False, numbers=True, punct=True, clean_all = True))\n",
+        "\n",
+        "    return cleaned_text"
+      ],
+      "metadata": {
+        "id": "dws3d49Lqv1b"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Apply the clean function to the questions and answers columns\n",
+        "\n",
+        "df.Question = list(clean(df.Question))\n",
+        "df.Answer = list(clean(df.Answer))"
+      ],
+      "metadata": {
+        "id": "H1ia-jFqrIsG"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Save the cleaned data into a new CSV file & save\n",
+        "df.to_csv(\"cleaned_med_QA_data.csv\", index=False)"
+      ],
+      "metadata": {
+        "id": "HcB15JQirImk"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### GPT-2 Model"
+      ],
+      "metadata": {
+        "id": "zw5mkpmueML4"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install datasets"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "id": "QhgGKgZ-rYAY",
+        "outputId": "f2334a48-2745-42b5-f5fd-929ca58e1ed6",
+        "collapsed": true
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Collecting datasets\n",
+            "  Downloading datasets-3.0.0-py3-none-any.whl.metadata (19 kB)\n",
+            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets) (3.16.0)\n",
+            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.26.4)\n",
+            "Collecting pyarrow>=15.0.0 (from datasets)\n",
+            "  Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)\n",
+            "Collecting dill<0.3.9,>=0.3.0 (from datasets)\n",
+            "  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n",
+            "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (2.1.4)\n",
+            "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.32.3)\n",
+            "Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.5)\n",
+            "Collecting xxhash (from datasets)\n",
+            "  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n",
+            "Collecting multiprocess (from datasets)\n",
+            "  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)\n",
+            "Requirement already satisfied: fsspec<=2024.6.1,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]<=2024.6.1,>=2023.1.0->datasets) (2024.6.1)\n",
+            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.10.5)\n",
+            "Requirement already satisfied: huggingface-hub>=0.22.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.24.6)\n",
+            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (24.1)\n",
+            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.2)\n",
+            "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.4.0)\n",
+            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n",
+            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (24.2.0)\n",
+            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.1)\n",
+            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.1.0)\n",
+            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.11.1)\n",
+            "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n",
+            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.22.0->datasets) (4.12.2)\n",
+            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (3.3.2)\n",
+            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (3.8)\n",
+            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (2.0.7)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (2024.8.30)\n",
+            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n",
+            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n",
+            "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.1)\n",
+            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n",
+            "Downloading datasets-3.0.0-py3-none-any.whl (474 kB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m474.3/474.3 kB\u001b[0m \u001b[31m32.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (39.9 MB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m39.9/39.9 MB\u001b[0m \u001b[31m19.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m14.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m20.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hInstalling collected packages: xxhash, pyarrow, dill, multiprocess, datasets\n",
+            "  Attempting uninstall: pyarrow\n",
+            "    Found existing installation: pyarrow 14.0.2\n",
+            "    Uninstalling pyarrow-14.0.2:\n",
+            "      Successfully uninstalled pyarrow-14.0.2\n",
+            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+            "cudf-cu12 24.4.1 requires pyarrow<15.0.0a0,>=14.0.1, but you have pyarrow 17.0.0 which is incompatible.\n",
+            "ibis-framework 8.0.0 requires pyarrow<16,>=2, but you have pyarrow 17.0.0 which is incompatible.\u001b[0m\u001b[31m\n",
+            "\u001b[0mSuccessfully installed datasets-3.0.0 dill-0.3.8 multiprocess-0.70.16 pyarrow-17.0.0 xxhash-3.5.0\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.colab-display-data+json": {
+              "pip_warning": {
+                "packages": [
+                  "pyarrow"
+                ]
+              },
+              "id": "a6cd6efad93b4c4cb5a29a91b023de8a"
+            }
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments\n",
+        "import torch\n",
+        "from datasets import load_dataset\n",
+        "\n",
+        "# Load the GPT-2 model and tokenizer\n",
+        "tokenizer = GPT2Tokenizer.from_pretrained('gpt2')\n",
+        "model = GPT2LMHeadModel.from_pretrained('gpt2')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "xgGgvCqerk-1",
+        "outputId": "e338ee7f-c898-41c4-b1f6-036f115d3735"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
+            "  warnings.warn(\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Set the padding token for the tokenizer to be the end-of-sequence token\n",
+        "tokenizer.pad_token = tokenizer.eos_token\n",
+        "\n",
+        "# Maximum sequence length that GPT-2 can handle\n",
+        "max_length = tokenizer.model_max_length\n",
+        "print(max_length)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "EeiMYkpCrp62",
+        "outputId": "e8b0118b-1694-4d9e-d666-e791b083f63f"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "1024\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Load the cleaned QA dataset as a training set using the 'datasets' library\n",
+        "dataset = load_dataset('csv', data_files={'train': 'cleaned_med_QA_data.csv'}, split='train')"
+      ],
+      "metadata": {
+        "id": "MW5Ad0exrry3"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#Function to tokenize questions and answers and prepare them for the model\n",
+        "def tokenize_function(examples):\n",
+        "  '''1. Combine each question and answer into a single input string\n",
+        "     2. Tokenize the combined text using the GPT-2 tokenizer\n",
+        "     3. Set the labels to be the same as the input_ids (shifted to predict the next word)\n",
+        "     4. Return the tokenized output. '''\n",
+        "\n",
+        "    combined_text = [str(q) + \" \" + str(a) for q, a in zip(examples['Question'], examples['Answer'])]\n",
+        "    tokenized_output = tokenizer(combined_text, padding='max_length', truncation=True, max_length=128)\n",
+        "\n",
+        "    # Set the labels to be the same as the input_ids (shifted to predict the next word)\n",
+        "    tokenized_output['labels'] = tokenized_output['input_ids'].copy()\n",
+        "\n",
+        "    return tokenized_output\n",
+        "\n",
+        "# Tokenize the entire dataset\n",
+        "tokenized_dataset = dataset.map(tokenize_function, batched=True)"
+      ],
+      "metadata": {
+        "id": "99rfOROKr-M0"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Define training arguments for the GPT-2 model\n",
+        "training_args = TrainingArguments(\n",
+        "    output_dir='./results',  # Directory to save model outputs\n",
+        "    num_train_epochs=20,  # Train for 50 epochs\n",
+        "    per_device_train_batch_size=16, # Batch size during training\n",
+        "    per_device_eval_batch_size=32,  # Batch size during evaluation\n",
+        "    warmup_steps=500,  # Warmup steps for learning rate scheduler\n",
+        "    weight_decay=0.01,  # Weight decay for regularization\n",
+        "    logging_dir='./logs',  # Directory for saving logs\n",
+        "    logging_steps=10,  # Log every 10 steps\n",
+        "    save_steps=1000,  # Save model checkpoints every 1000 steps\n",
+        ")\n",
+        "\n",
+        "# Trainer class to handle training process\n",
+        "trainer = Trainer(\n",
+        "    model=model,\n",
+        "    args=training_args,\n",
+        "    train_dataset=tokenized_dataset,\n",
+        "    tokenizer=tokenizer,\n",
+        ")\n",
+        "\n",
+        "# Train the model\n",
+        "trainer.train()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "id": "TQGJ16yJsCBc",
+        "outputId": "ec5b1ae4-83c1-4117-95fe-3aae63fc0f75",
+        "collapsed": true
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "\n",
+              "    <div>\n",
+              "      \n",
+              "      <progress value='880' max='880' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+              "      [880/880 08:45, Epoch 20/20]\n",
+              "    </div>\n",
+              "    <table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              " <tr style=\"text-align: left;\">\n",
+              "      <th>Step</th>\n",
+              "      <th>Training Loss</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <td>10</td>\n",
+              "      <td>5.891800</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>20</td>\n",
+              "      <td>5.497900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>30</td>\n",
+              "      <td>4.671300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>40</td>\n",
+              "      <td>3.751500</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>50</td>\n",
+              "      <td>3.016000</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>60</td>\n",
+              "      <td>2.633300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>70</td>\n",
+              "      <td>2.360800</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>80</td>\n",
+              "      <td>2.079000</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>90</td>\n",
+              "      <td>2.145600</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>100</td>\n",
+              "      <td>2.150100</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>110</td>\n",
+              "      <td>2.069300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>120</td>\n",
+              "      <td>2.000300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>130</td>\n",
+              "      <td>1.919900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>140</td>\n",
+              "      <td>1.954000</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>150</td>\n",
+              "      <td>1.928500</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>160</td>\n",
+              "      <td>1.832900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>170</td>\n",
+              "      <td>1.921300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>180</td>\n",
+              "      <td>2.043500</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>190</td>\n",
+              "      <td>1.827400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>200</td>\n",
+              "      <td>1.687700</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>210</td>\n",
+              "      <td>1.782400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>220</td>\n",
+              "      <td>1.959600</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>230</td>\n",
+              "      <td>1.810500</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>240</td>\n",
+              "      <td>1.706800</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>250</td>\n",
+              "      <td>1.662200</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>260</td>\n",
+              "      <td>1.783900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>270</td>\n",
+              "      <td>1.567300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>280</td>\n",
+              "      <td>1.695100</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>290</td>\n",
+              "      <td>1.681800</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>300</td>\n",
+              "      <td>1.657400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>310</td>\n",
+              "      <td>1.684000</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>320</td>\n",
+              "      <td>1.494700</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>330</td>\n",
+              "      <td>1.556800</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>340</td>\n",
+              "      <td>1.648300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>350</td>\n",
+              "      <td>1.529300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>360</td>\n",
+              "      <td>1.421200</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>370</td>\n",
+              "      <td>1.483900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>380</td>\n",
+              "      <td>1.588400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>390</td>\n",
+              "      <td>1.442200</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>400</td>\n",
+              "      <td>1.524600</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>410</td>\n",
+              "      <td>1.469100</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>420</td>\n",
+              "      <td>1.412900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>430</td>\n",
+              "      <td>1.388300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>440</td>\n",
+              "      <td>1.414400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>450</td>\n",
+              "      <td>1.368200</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>460</td>\n",
+              "      <td>1.374900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>470</td>\n",
+              "      <td>1.336500</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>480</td>\n",
+              "      <td>1.294900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>490</td>\n",
+              "      <td>1.231700</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>500</td>\n",
+              "      <td>1.287600</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>510</td>\n",
+              "      <td>1.248500</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>520</td>\n",
+              "      <td>1.220700</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>530</td>\n",
+              "      <td>1.335700</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>540</td>\n",
+              "      <td>1.094200</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>550</td>\n",
+              "      <td>1.151400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>560</td>\n",
+              "      <td>1.215000</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>570</td>\n",
+              "      <td>1.235600</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>580</td>\n",
+              "      <td>1.139800</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>590</td>\n",
+              "      <td>1.119600</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>600</td>\n",
+              "      <td>1.148000</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>610</td>\n",
+              "      <td>1.057300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>620</td>\n",
+              "      <td>1.039700</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>630</td>\n",
+              "      <td>1.081300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>640</td>\n",
+              "      <td>0.960300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>650</td>\n",
+              "      <td>1.026400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>660</td>\n",
+              "      <td>1.049900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>670</td>\n",
+              "      <td>0.967600</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>680</td>\n",
+              "      <td>0.902100</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>690</td>\n",
+              "      <td>0.950900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>700</td>\n",
+              "      <td>0.998500</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>710</td>\n",
+              "      <td>1.043500</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>720</td>\n",
+              "      <td>0.877700</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>730</td>\n",
+              "      <td>0.818800</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>740</td>\n",
+              "      <td>0.949500</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>750</td>\n",
+              "      <td>1.032200</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>760</td>\n",
+              "      <td>0.813600</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>770</td>\n",
+              "      <td>0.871600</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>780</td>\n",
+              "      <td>0.877400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>790</td>\n",
+              "      <td>0.952400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>800</td>\n",
+              "      <td>0.819600</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>810</td>\n",
+              "      <td>0.852700</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>820</td>\n",
+              "      <td>0.848300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>830</td>\n",
+              "      <td>0.834200</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>840</td>\n",
+              "      <td>0.900900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>850</td>\n",
+              "      <td>0.830800</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>860</td>\n",
+              "      <td>0.864700</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>870</td>\n",
+              "      <td>0.842200</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>880</td>\n",
+              "      <td>0.865000</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table><p>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "TrainOutput(global_step=880, training_loss=1.5622584277933294, metrics={'train_runtime': 525.9662, 'train_samples_per_second': 26.237, 'train_steps_per_second': 1.673, 'total_flos': 901457510400000.0, 'train_loss': 1.5622584277933294, 'epoch': 20.0})"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 13
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Save the model\n",
+        "trainer.save_model('med_info_model')"
+      ],
+      "metadata": {
+        "id": "4UrH8iP0u6Cp"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Testing"
+      ],
+      "metadata": {
+        "id": "VhXRJT6jeTuz"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Function to generate a response based on a user prompt (testing the model)\n",
+        "def generate_response(prompt):\n",
+        "    inputs = tokenizer.encode(prompt, return_tensors=\"pt\").to('cuda')\n",
+        "    outputs = model.generate(inputs, max_length=150, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)\n",
+        "\n",
+        "    # Decode the generated output\n",
+        "    response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
+        "\n",
+        "    # Remove the prompt from the response\n",
+        "    if response.startswith(prompt):\n",
+        "        response = response[len(prompt):].strip()  # Remove the prompt from the response\n",
+        "\n",
+        "    return response"
+      ],
+      "metadata": {
+        "id": "JbMs8UuSu5_R"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Example conversation\n",
+        "user_input = \"what is desonide ointment used for\"\n",
+        "bot_response = generate_response(user_input)\n",
+        "print(\"Bot Response:\", bot_response)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "qsHAT1-uxC4_",
+        "outputId": "89b73c5f-0ae9-449d-8eb4-3df1a7c146bb"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Bot Response: desonide ointment is used to treat a variety of conditions it is used to treat allergies and other skin conditions it is also used to treat certain types of infections it is also used to treat skin infections caused by bacteria that are on skin desonide is in a class of medications called antimicrobials it works by killing bacteria that cause skin infections desonide is in a class of medications called antibiotics it works by killing bacteria that cause skin infections\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Copying the model to Google Drive (optional)\n",
+        "import shutil\n",
+        "\n",
+        "# Path to the file in Colab\n",
+        "colab_file_path = '/content/med_info_model/model.safetensors'\n",
+        "\n",
+        "# Path to your Google Drive\n",
+        "drive_file_path = '/content/drive/MyDrive'\n",
+        "\n",
+        "# Copy the file\n",
+        "shutil.copy(colab_file_path, drive_file_path)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 36
+        },
+        "id": "aP4IEboMxDWG",
+        "outputId": "c00d1d74-e389-4de4-a151-d20736b6bccd"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "'/content/drive/MyDrive/model.safetensors'"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            }
+          },
+          "metadata": {},
+          "execution_count": 22
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "uKYwYe5XyXgx"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}

app.py ADDED Viewed

	@@ -0,0 +1,118 @@

+from flask import Flask, jsonify, request, send_from_directory
+from backend.utils import (
+    generate_counseling_response,
+    generate_medication_response,
+    classify_diabetes,
+    classify_medicine,
+    get_llama_response
+)
+import os
+app = Flask(__name__, static_folder='frontend', template_folder='frontend')
+# Serve the main HTML file for the frontend
+@app.route('/')
+def index():
+    return send_from_directory(app.static_folder, 'index.html')
+# Serve the CSS files
+@app.route('/styles.css')
+def styles():
+    return send_from_directory(app.static_folder, 'styles.css')
+# Serve the JavaScript files
+@app.route('/script.js')
+def script():
+    return send_from_directory(app.static_folder, 'script.js')
+# Route for Counseling Model
+@app.route('/api/counseling', methods=['POST'])
+def counseling():
+    data = request.json
+    question = data.get('question')
+    if not question:
+        return jsonify({"error": "Question is required."}), 400
+    response = generate_counseling_response(question)
+    return jsonify({"response": response})
+# Route for Medication Info Model
+@app.route('/api/medication', methods=['POST'])
+def medication():
+    data = request.json
+    question = data.get('question')
+    if not question:
+        return jsonify({"error": "Question is required."}), 400
+    response = generate_medication_response(question)
+    return jsonify({"response": response})
+# Route for Diabetes Classification
+@app.route('/api/diabetes_classification', methods=['POST'])
+def diabetes_classification():
+    data = request.json
+    # Extract input features
+    glucose = data.get('glucose')
+    bmi = data.get('bmi')
+    age = data.get('age')
+    # Validate input data
+    if glucose is None or bmi is None or age is None:
+        return jsonify({"error": "Please provide glucose, bmi, and age."}), 400
+    result = classify_diabetes(glucose, bmi, age)
+    return jsonify({"result": result})
+# Route for Medicine Classification
+@app.route('/api/medicine_classification', methods=['POST'])
+def medicine_classification():
+    data = request.json
+    # Extract input features
+    age = data.get('age')
+    gender = data.get('gender')
+    blood_type = data.get('blood_type')
+    medical_condition = data.get('medical_condition')
+    test_results = data.get('test_results')
+    # Validate input data
+    if not (age and gender and blood_type and medical_condition and test_results):
+        return jsonify({"error": "Please provide Age, Gender, Blood Type, Medical Condition, and Test Results."}), 400
+    # Prepare the new data as a DataFrame
+    new_data = {
+        'Age': [int(age)],
+        'Gender': [gender],
+        'Blood Type': [blood_type],
+        'Medical Condition': [medical_condition],
+        'Test Results': [test_results]
+    }
+    # Call the classification function
+    medicine = classify_medicine(new_data)
+    return jsonify({"medicine": medicine[0]})
+# Route for General Chat (Llama 3.1 API using Groq Cloud)
+@app.route('/api/general', methods=['POST'])
+def general_chat():
+    data = request.json
+    question = data.get('question')
+    if not question:
+        return jsonify({"error": "Question is required."}), 400
+    # Get formatted response from LLaMA 3.1 hosted on Groq Cloud
+    llama_response = get_llama_response(question)
+    return jsonify({"response": llama_response})
+if __name__ == '__main__':
+    app.run(debug=True)

backend/__init__.py ADDED Viewed

File without changes

backend/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (139 Bytes). View file

backend/__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (5.4 kB). View file

backend/models/diabetes_model/random_forest_modelf.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a85f94f5f6d3042ac7b03513b38ff6472eb3bbdaa5d9d218734398f86f32a2b0
+size 2412153

backend/models/diabetes_model/standard_scaler.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06c474ca12ad0be6d4364a5c3e791799deae88319690b39853b21321472e9483
+size 671

backend/models/medication_classification_model/age_scaler.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:11e23987d039e63910799d5b655f1854e7a771f976aa9616e283bc560cf8a05a
+size 927

backend/models/medication_classification_model/knn_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1f2d56e390c7f035c379fac579590e938b8d6559d294c86107981923d3c1a45
+size 4493526

backend/models/medication_classification_model/label_encoders.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:054ccfdced7e079ad079278f88e4694501e734aa63c2b7b2704c76eda9157a89
+size 1576

backend/models/medication_classification_model/medication_encoder.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:67594b193e6f4a44315776b4694012fd38e5ec157ea649091d3758135c3b2dfb
+size 597

backend/models/medication_info/config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "use_cache": true,
+  "vocab_size": 50257
+}

backend/models/medication_info/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.44.2"
+}

backend/models/medication_info/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

backend/models/medication_info/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:796a9391740bb9884e37b9f11b4c0d9f57c06941f057fa6345536d13c771e810
+size 497774208

backend/models/medication_info/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

backend/models/medication_info/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 1024,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

backend/models/medication_info/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c3765ef35134499414f1f0ec4f0439ae47cdf23380f5535f5092007be173d31c
+size 5112

backend/models/medication_info/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

backend/models/mental_health_model/config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "use_cache": true,
+  "vocab_size": 50257
+}

backend/models/mental_health_model/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.44.2"
+}

backend/models/mental_health_model/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

backend/models/mental_health_model/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eea02c798a4efdadb3ecf163a411a12f393d9ac30c9f5019348a65a666dabdbc
+size 497774208

backend/models/mental_health_model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

backend/models/mental_health_model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 1024,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

backend/models/mental_health_model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9fe4d3b4fb9feb46fdfc5a116e608dc00a98241105ed35a3cc8d220ee6e20886
+size 5112

backend/models/mental_health_model/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

backend/utils.py ADDED Viewed

	@@ -0,0 +1,125 @@

+# backend/utils.py
+from transformers import GPT2LMHeadModel, GPT2Tokenizer
+from langchain_groq import ChatGroq
+import torch
+import requests
+import joblib
+import pandas as pd
+# Load the trained model and tokenizer : Counselling
+counseling_model = GPT2LMHeadModel.from_pretrained('backend\\models\\mental_health_model')
+counselling_tokenizer = GPT2Tokenizer.from_pretrained('backend\\models\\mental_health_model')
+# Load the trained model and tokenizer : Medication
+medication_model = GPT2LMHeadModel.from_pretrained('backend\\models\\medication_info')
+medication_tokenizer = GPT2Tokenizer.from_pretrained('backend\\models\\medication_info')
+# Load the trained Random Forest model and StandardScaler
+diabetes_model = joblib.load('backend\\models\\diabetes_model\\random_forest_modelf.joblib')
+diabetes_scaler = joblib.load('backend\\models\\diabetes_model\\standard_scaler.joblib')
+# Load the model, encoders, and scaler
+knn = joblib.load('backend\\models\\medication_classification_model\\knn_model.pkl')
+label_encoders = joblib.load('backend\\models\\medication_classification_model\\label_encoders.pkl')
+age_scaler = joblib.load('backend\\models\\medication_classification_model\\age_scaler.pkl')
+medication_encoder = joblib.load('backend\\models\\medication_classification_model\\medication_encoder.pkl')
+# Diabetes Classifier
+def classify_diabetes(glucose, bmi, age):
+    # Normalize the input features
+    input_features = [[glucose, bmi, age]]
+    input_features_norm = diabetes_scaler.transform(input_features)
+    # Make predictions
+    prediction = diabetes_model.predict(input_features_norm)[0]
+    prediction_probability = diabetes_model.predict_proba(input_features_norm)[0] * 100
+    diabetic_probability = prediction_probability[prediction].item()
+    if prediction == 0:
+        result = "Non Diabetic"
+    else:
+        result = "Diabetic"
+    # Format the output as: "Non Diabetic | 72%"
+    formatted_result = f"{result} | {diabetic_probability:.1f}%"
+    return formatted_result
+# Medicine Classifier
+def classify_medicine(new_data):
+    # Convert dictionary to DataFrame
+    new_data_df = pd.DataFrame(new_data)
+    # Encode the new data using the saved label encoders
+    for column in ['Gender', 'Blood Type', 'Medical Condition', 'Test Results']:
+        new_data_df[column] = label_encoders[column].transform(new_data_df[column])
+    # Normalize the 'Age' column in the new data
+    new_data_df['Age'] = age_scaler.transform(new_data_df[['Age']])
+    # Make predictions
+    predictions = knn.predict(new_data_df)
+    # Decode the predictions back to the original medication names
+    predicted_medications = medication_encoder.inverse_transform(predictions)
+    return predicted_medications
+# Generate Counseling Response
+def generate_counseling_response(prompt):
+    inputs = counselling_tokenizer.encode(prompt, return_tensors="pt")
+    outputs = counseling_model.generate(inputs, max_length=150, num_return_sequences=1, pad_token_id=counselling_tokenizer.eos_token_id)
+    # Decode the generated output
+    response = counselling_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Remove the prompt from the response
+    if response.startswith(prompt):
+        response = response[len(prompt):].strip()  # Remove the prompt from the response
+    return response
+# Generate Medication Response
+def generate_medication_response(prompt):
+    inputs = medication_tokenizer.encode(prompt, return_tensors="pt")
+    outputs = medication_model.generate(inputs, max_length=150, num_return_sequences=1, pad_token_id=medication_tokenizer.eos_token_id)
+    # Decode the generated output
+    response = medication_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Remove the prompt from the response
+    if response.startswith(prompt):
+        response = response[len(prompt):].strip()  # Remove the prompt from the response
+    return response
+# Llama 3.1 Integration as a General Tab
+llm = ChatGroq(
+    temperature=0,
+    groq_api_key='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',
+    model_name="llama-3.1-70b-versatile"
+)
+def get_llama_response(prompt):
+    try:
+        response = llm.invoke(prompt)
+        formatted_response = format_response(response.content)
+        return formatted_response
+    except Exception as e:
+        return f"Error: {str(e)}"
+def format_response(response):
+    # Add line breaks and make it easier to read
+    response = response.replace("**", "").replace("*", "").replace("  ", "\n").strip()
+    lines = response.split("\n")
+    formatted_response = ""
+    for line in lines:
+        formatted_response += f"<p>{line}</p>"
+    return formatted_response

frontend/index.html ADDED Viewed

	@@ -0,0 +1,80 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>AHA</title>
+    <link rel="stylesheet" href="/styles.css">
+</head>
+<body>
+    <div class="container">
+        <h1>AI Health Assistant</h1>
+        <div class="tabs">
+            <button class="tab-button" onclick="showTab('counseling')">Counseling Chat</button>
+            <button class="tab-button" onclick="showTab('medication')">Medication Chat</button>
+            <button class="tab-button" onclick="showTab('general')">General Chat</button>
+            <button class="tab-button" onclick="showTab('diabetes')">Diabetes Classification</button>
+            <button class="tab-button" onclick="showTab('medicine')">Medicine Classification</button>
+        </div>
+        <div id="counseling" class="tab-content">
+            <textarea id="counseling-question" placeholder="Ask your health problem here..."></textarea>
+            <button onclick="submitCounseling()">Generate</button>
+            <p id="counseling-response"></p>
+        </div>
+        <div id="medication" class="tab-content">
+            <textarea id="medication-question" placeholder="Ask your medicine here..."></textarea>
+            <button onclick="submitMedication()">Generate</button>
+            <p id="medication-response"></p>
+        </div>
+        <div id="diabetes" class="tab-content">
+            <input type="number" id="glucose" placeholder="Glucose Level" required>
+            <input type="number" id="bmi" placeholder="BMI" required>
+            <input type="number" id="age-diabetes" placeholder="Age" required>
+            <button onclick="submitDiabetes()">Submit</button>
+            <p id="diabetes-response"></p>
+        </div>
+        <div id="medicine" class="tab-content">
+            <input type="number" id="age" placeholder="Age" required>
+            <select id="gender" required>
+                <option value="" disabled selected>Gender</option>
+                <option value="Male">Male</option>
+                <option value="Female">Female</option>
+            </select>
+            <select id="blood-type" required>
+                <option value="" disabled selected>Blood Group</option>
+                <option value="A+">A+</option>
+                <option value="A-">A-</option>
+                <option value="B+">B+</option>
+                <option value="B-">B-</option>
+                <option value="AB+">AB+</option>
+                <option value="AB-">AB-</option>
+                <option value="O+">O+</option>
+                <option value="O-">O-</option>
+            </select>
+            <select id="medical-condition" required>
+                <option value="" disabled selected>Medical Condition</option>
+                <option value="Cancer">Cancer</option>
+                <option value="Arthritis">Arthritis</option>
+                <option value="Diabetes">Diabetes</option>
+                <option value="Hypertension">Hypertension</option>
+                <option value="Obesity">Obesity</option>
+                <option value="Asthma">Asthma</option>
+            </select>
+            <select id="test-results" required>
+                <option value="" disabled selected>Test Results</option>
+                <option value="Normal">Normal</option>
+                <option value="Abnormal">Abnormal</option>
+                <option value="Inconclusive">Inconclusive</option>
+            </select>
+            <button onclick="submitMedicine()">Submit</button>
+            <p id="medicine-response"></p>
+        </div>
+        <div id="general" class="tab-content">
+            <textarea id="general-question" placeholder="Ask your question here..."></textarea>
+            <button onclick="submitGeneral()">Generate</button>
+            <p id="general-response"></p>
+        </div>
+    </div>
+    <script src="/script.js"></script>
+</body>
+</html>

frontend/script.js ADDED Viewed

	@@ -0,0 +1,87 @@

+function showTab(tabId) {
+    const tabs = document.querySelectorAll('.tab-content');
+    tabs.forEach(tab => {
+        if (tab.id === tabId) {
+            tab.classList.add('active');
+        } else {
+            tab.classList.remove('active');
+        }
+    });
+}
+function submitCounseling() {
+    const question = document.getElementById('counseling-question').value;
+    fetch('/api/counseling', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ question })
+    })
+    .then(response => response.json())
+    .then(data => {
+        document.getElementById('counseling-response').innerText = data.response;
+    })
+    .catch(error => console.error('Error:', error));
+}
+function submitMedication() {
+    const question = document.getElementById('medication-question').value;
+    fetch('/api/medication', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ question })
+    })
+    .then(response => response.json())
+    .then(data => {
+        document.getElementById('medication-response').innerText = data.response;
+    })
+    .catch(error => console.error('Error:', error));
+}
+function submitDiabetes() {
+    const glucose = document.getElementById('glucose').value;
+    const bmi = document.getElementById('bmi').value;
+    const age = document.getElementById('age-diabetes').value;
+    fetch('/api/diabetes_classification', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ glucose, bmi, age })
+    })
+    .then(response => response.json())
+    .then(data => {
+        document.getElementById('diabetes-response').innerText = data.result;
+    })
+    .catch(error => console.error('Error:', error));
+}
+function submitMedicine() {
+    const age = document.getElementById('age').value;
+    const gender = document.getElementById('gender').value;
+    const bloodType = document.getElementById('blood-type').value;
+    const medicalCondition = document.getElementById('medical-condition').value;
+    const testResults = document.getElementById('test-results').value;
+    fetch('/api/medicine_classification', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ age, gender, blood_type: bloodType, medical_condition: medicalCondition, test_results: testResults })
+    })
+    .then(response => response.json())
+    .then(data => {
+        document.getElementById('medicine-response').innerText = data.medicine;
+    })
+    .catch(error => console.error('Error:', error));
+}
+function submitGeneral() {
+    const question = document.getElementById('general-question').value;
+    fetch('/api/general', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ question })
+    })
+    .then(response => response.json())
+    .then(data => {
+        document.getElementById('general-response').innerText = data.response;
+    })
+    .catch(error => console.error('Error:', error));
+}

frontend/styles.css ADDED Viewed

	@@ -0,0 +1,89 @@

+body {
+    font-family: Arial, sans-serif;
+    margin: 0;
+    padding: 0;
+    background: #121212; /* Dark background for body */
+    color: #e0e0e0; /* Light text color */
+}
+.container {
+    width: 80%;
+    margin: 0 auto;
+    padding: 20px;
+}
+h1 {
+    text-align: center;
+    color: #ffffff; /* Light color for headers */
+}
+.tabs {
+    display: flex;
+    justify-content: space-around;
+    margin-bottom: 20px;
+}
+.tab-button {
+    padding: 10px 20px;
+    background: #054c66; /* Dark background for tab buttons */
+    color: #ffffff; /* Light text color */
+    border: none;
+    cursor: pointer;
+    border-radius: 5px;
+    transition: background 0.3s;
+}
+.tab-button:hover {
+    background: #226f90; /* Slightly lighter background on hover */
+}
+.tab-content {
+    display: none;
+    animation: fadeIn 0.5s;
+}
+.tab-content.active {
+    display: block;
+}
+textarea, input, select {
+    display: block;
+    width: 100%;
+    padding: 10px;
+    margin-bottom: 10px;
+    border: 1px solid #444; /* Darker border color */
+    border-radius: 5px;
+    background: #1e1e1e; /* Dark background for inputs */
+    color: #e0e0e0; /* Light text color */
+    transition: border-color 0.3s;
+}
+textarea:focus, input:focus, select:focus {
+    border-color: #00b3ff; /* Highlight border on focus */
+}
+button {
+    padding: 10px 20px;
+    background: #0b979e; /* Dark green background */
+    color: #fff;
+    border: none;
+    cursor: pointer;
+    border-radius: 5px;
+    transition: background 0.3s;
+}
+button:hover {
+    background: #417a7d; /* Slightly darker green on hover */
+}
+p {
+    font-size: 1.2em;
+    color: #e0e0e0; /* Light text color for paragraphs */
+    margin-top: 10px;
+}
+/* Keyframes for fade-in animation */
+@keyframes fadeIn {
+    from { opacity: 0; }
+    to { opacity: 1; }
+}