{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 11742,
     "status": "ok",
     "timestamp": 1730569728288,
     "user": {
      "displayName": "Ismat Samadov",
      "userId": "13714662825869203427"
     },
     "user_tz": -240
    },
    "id": "5v8KnAaD-z9t",
    "outputId": "e58efe1c-a3de-4271-8e43-ecb8168a35de"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.44.2)\n",
      "Collecting datasets\n",
      "  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)\n",
      "Collecting seqeval\n",
      "  Downloading seqeval-1.2.2.tar.gz (43 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.6/43.6 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
      "Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.10/dist-packages (0.24.7)\n",
      "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.16.1)\n",
      "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.26.4)\n",
      "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (24.1)\n",
      "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.2)\n",
      "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2024.9.11)\n",
      "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.32.3)\n",
      "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.5)\n",
      "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.1)\n",
      "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.5)\n",
      "Collecting pyarrow>=15.0.0 (from datasets)\n",
      "  Downloading pyarrow-18.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)\n",
      "Collecting dill<0.3.9,>=0.3.0 (from datasets)\n",
      "  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n",
      "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (2.1.4)\n",
      "Collecting xxhash (from datasets)\n",
      "  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n",
      "Collecting multiprocess<0.70.17 (from datasets)\n",
      "  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)\n",
      "Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets) (2024.6.1)\n",
      "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.10.5)\n",
      "Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.5.2)\n",
      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (4.12.2)\n",
      "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.4.0)\n",
      "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n",
      "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (24.2.0)\n",
      "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.1)\n",
      "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.1.0)\n",
      "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.11.1)\n",
      "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.10)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.2.3)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2024.8.30)\n",
      "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.13.1)\n",
      "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.4.2)\n",
      "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (3.5.0)\n",
      "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n",
      "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n",
      "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.1)\n",
      "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n",
      "Downloading datasets-3.1.0-py3-none-any.whl (480 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m31.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m10.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading pyarrow-18.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (40.0 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.0/40.0 MB\u001b[0m \u001b[31m56.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m19.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hBuilding wheels for collected packages: seqeval\n",
      "  Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
      "  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16161 sha256=0d4c765f01269491e0baddd39f053312bc7aa62fa5ead5b3e0dcb0bea5f36694\n",
      "  Stored in directory: /root/.cache/pip/wheels/1a/67/4a/ad4082dd7dfc30f2abfe4d80a2ed5926a506eb8a972b4767fa\n",
      "Successfully built seqeval\n",
      "Installing collected packages: xxhash, pyarrow, dill, multiprocess, seqeval, datasets\n",
      "  Attempting uninstall: pyarrow\n",
      "    Found existing installation: pyarrow 14.0.2\n",
      "    Uninstalling pyarrow-14.0.2:\n",
      "      Successfully uninstalled pyarrow-14.0.2\n",
      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
      "cudf-cu12 24.4.1 requires pyarrow<15.0.0a0,>=14.0.1, but you have pyarrow 18.0.0 which is incompatible.\n",
      "ibis-framework 8.0.0 requires pyarrow<16,>=2, but you have pyarrow 18.0.0 which is incompatible.\u001b[0m\u001b[31m\n",
      "\u001b[0mSuccessfully installed datasets-3.1.0 dill-0.3.8 multiprocess-0.70.16 pyarrow-18.0.0 seqeval-1.2.2 xxhash-3.5.0\n"
     ]
    }
   ],
   "source": [
    "!pip install transformers datasets seqeval huggingface_hub\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "amREIFSH-z7r"
   },
   "outputs": [],
   "source": [
    "# Standard library imports\n",
    "import os                 # Provides functions for interacting with the operating system\n",
    "import warnings           # Used to handle or suppress warnings\n",
    "import numpy as np        # Essential for numerical operations and array manipulation\n",
    "import torch              # PyTorch library for tensor computations and model handling\n",
    "import ast                # Used for safe evaluation of strings to Python objects (e.g., parsing tokens)\n",
    "\n",
    "# Hugging Face and Transformers imports\n",
    "from datasets import load_dataset                     # Loads datasets for model training and evaluation\n",
    "from transformers import (\n",
    "    AutoTokenizer,                                   # Initializes a tokenizer from a pre-trained model\n",
    "    DataCollatorForTokenClassification,              # Handles padding and formatting of token classification data\n",
    "    TrainingArguments,                               # Defines training parameters like batch size and learning rate\n",
    "    Trainer,                                         # High-level API for managing training and evaluation\n",
    "    AutoModelForTokenClassification,                 # Loads a pre-trained model for token classification tasks\n",
    "    get_linear_schedule_with_warmup,                 # Learning rate scheduler for gradual warm-up and linear decay\n",
    "    EarlyStoppingCallback                           # Callback to stop training if validation performance plateaus\n",
    ")\n",
    "\n",
    "# Hugging Face Hub\n",
    "from huggingface_hub import login                   # Allows logging in to Hugging Face Hub to upload models\n",
    "\n",
    "# seqeval metrics for NER evaluation\n",
    "from seqeval.metrics import precision_score, recall_score, f1_score, classification_report\n",
    "# Provides precision, recall, F1-score, and classification report for evaluating NER model performance\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 6,
     "status": "ok",
     "timestamp": 1730569738838,
     "user": {
      "displayName": "Ismat Samadov",
      "userId": "13714662825869203427"
     },
     "user_tz": -240
    },
    "id": "K7adlboI-z4p",
    "outputId": "a4306ab9-b7d5-4108-e3f6-1edc4899694f"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n",
      "Token is valid (permission: fineGrained).\n",
      "Your token has been saved to /root/.cache/huggingface/token\n",
      "Login successful\n"
     ]
    }
   ],
   "source": [
    "# Log in to Hugging Face Hub\n",
    "login(token=\"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\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Qccgsjfs-zzA"
   },
   "outputs": [],
   "source": [
    "# Disable WandB (Weights & Biases) logging to avoid unwanted log outputs during training\n",
    "os.environ[\"WANDB_DISABLED\"] = \"true\"\n",
    "\n",
    "# Suppress warning messages to keep output clean, especially during training and evaluation\n",
    "warnings.filterwarnings(\"ignore\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 576,
     "referenced_widgets": [
      "7de65e247932425882a27257bf77b913",
      "228bb22c294f4fd1a08910c080586e65",
      "1b3d4b4f685f47f5b4cd1a83e76330c0",
      "3d96ee9792cf4ba5b57b7858678985bb",
      "84a7d8ad32da45c792d570a918f136d6",
      "c8b59cd42f04425e8de6f832acff43a4",
      "5546cbf48870406ca3e93e6ebed6487b",
      "f553dfdc8cf441a6990a8dc2126e55e5",
      "dcfd93ce24474f639418c7f2b56ce101",
      "7a14fd2365fa40ac92e8851019639a46",
      "804273498d974d388585d2f8769db2cb",
      "a0d5b69380ab4cccbe9fa9555e733d29",
      "a5f12e46915c4c7dbd92a8e6866a2136",
      "9d2dd1835a6d436abdda27377c39951d",
      "bde988b2464c49e38d9d660124e1a1ca",
      "6786dbb578764ecbb07549f99284e872",
      "1a4a5ae499224227b594f7e44ec05626",
      "d71e12cac7384779b7cf6deccdec6205",
      "6dd8528c7a8c4cf59a8be94ea2eec2ac",
      "e53b7766d65f4f31a000d433fe1718d8",
      "ee5a83f45127475496dbb8347c2e3b9e",
      "998183d1e43d4b58b6d49796dd6c65d2",
      "e2baeb32f3bb47aeab3d6ebf3a5e8876",
      "6d2ed7f8c8e54d32b94a410de93bebb4",
      "b42331f0c0ca417ea779764f7c7b91a6",
      "77528b7a72024e3ba2c03f495188b091",
      "4d770190eebf44e59e4c85598c48bb5e",
      "f1247d8b523e4d03bf802ea761090255",
      "dc951346b3d742d7861929d4d4bd69db",
      "2e185b6e38e348919accedcf54fe32ec",
      "daa89fa3059144ceaf95c8d8d209c59d",
      "371faab019ee49d78fa2547a65fc68b9",
      "30f20a8152e3450cb5c0c5300f2a677b",
      "fba60dcd62c04b53acba3c371d926149",
      "844335f761134693ae7aff60803dc62a",
      "0835d840cd7a4444b477187796df6ab1",
      "0b430c3c4ff641a38e4a6603b30dfdf0",
      "e68c8857489c4b5e83c52b12d32b71e6",
      "9a688924cb2b4a9c984eddee5d6e57d7",
      "a2681b1b407e45dbb2277685ad082fd3",
      "c324af888b614caa9cead361c9decafe",
      "542aeebec2144a79b36baa0fdfb4bc32",
      "975acf8253a3411293d961167054e7f0",
      "1c184b2b6c164ac4965182b4dd760faa"
     ]
    },
    "executionInfo": {
     "elapsed": 23970,
     "status": "ok",
     "timestamp": 1730569762804,
     "user": {
      "displayName": "Ismat Samadov",
      "userId": "13714662825869203427"
     },
     "user_tz": -240
    },
    "id": "fQ6ttUM8-zwM",
    "outputId": "a7edc99a-0064-4378-ab18-c31f29c9b727"
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7de65e247932425882a27257bf77b913",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "README.md:   0%|          | 0.00/2.87k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a0d5b69380ab4cccbe9fa9555e733d29",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "train-00000-of-00001.parquet:   0%|          | 0.00/13.6M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e2baeb32f3bb47aeab3d6ebf3a5e8876",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating train split:   0%|          | 0/99545 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DatasetDict({\n",
      "    train: Dataset({\n",
      "        features: ['index', 'tokens', 'ner_tags'],\n",
      "        num_rows: 99545\n",
      "    })\n",
      "})\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fba60dcd62c04b53acba3c371d926149",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map:   0%|          | 0/99545 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Skipping malformed example: 7171f30e-fa1e-49ec-975e-16c88c9b95e9 due to error: malformed node or string: None\n",
      "Skipping malformed example: 91dfd97b-2997-4080-8054-00cadec14dfc due to error: malformed node or string: None\n",
      "Skipping malformed example: cfb8beb4-ae7a-4185-9a54-08b0e85d03d3 due to error: malformed node or string: None\n",
      "Skipping malformed example: 5f0a2991-38b3-435b-9059-a05382e89a62 due to error: malformed node or string: None\n",
      "Skipping malformed example: 9d705fde-ce09-4bef-9f4a-9ad1fa452cc9 due to error: malformed node or string: None\n",
      "Skipping malformed example: 182457fb-c648-4fca-a207-af5a00072d4a due to error: malformed node or string: None\n",
      "Skipping malformed example: d9205ccd-c692-4cf1-8310-181de8f4cdc8 due to error: malformed node or string: None\n",
      "Skipping malformed example: dac55265-38cd-4c4b-9e56-a48a77e108d4 due to error: malformed node or string: None\n",
      "Skipping malformed example: f3d38b45-0035-45ab-b0aa-79ae7c63ba7a due to error: malformed node or string: None\n",
      "Skipping malformed example: 5ed32762-bf5b-4db4-9dbd-07cd5c0541dc due to error: malformed node or string: None\n",
      "Skipping malformed example: 426fc958-8c6b-41d8-acfe-2082a6be6ada due to error: malformed node or string: None\n",
      "Skipping malformed example: 4b5aa52d-cd5e-43ee-ac4f-7a8da00860e1 due to error: malformed node or string: None\n",
      "Skipping malformed example: 53b1ce49-1f71-4770-a344-bf1d804fefd4 due to error: malformed node or string: None\n",
      "Skipping malformed example: 03e9e957-da8f-45dc-84d0-e556bfd023b3 due to error: malformed node or string: None\n",
      "Skipping malformed example: b7e12634-f7be-42cb-8e76-837af2f2d877 due to error: malformed node or string: None\n",
      "Skipping malformed example: 0c77b0ac-b1cf-4730-ae3d-d7c59221f181 due to error: malformed node or string: None\n",
      "Skipping malformed example: b4623202-dfcb-4fa8-9d28-5af818111de2 due to error: malformed node or string: None\n"
     ]
    }
   ],
   "source": [
    "# Load the Azerbaijani NER dataset from Hugging Face\n",
    "dataset = load_dataset(\"LocalDoc/azerbaijani-ner-dataset\")\n",
    "print(dataset)  # Display dataset structure (e.g., train/validation splits)\n",
    "\n",
    "# Preprocessing function to format tokens and NER tags correctly\n",
    "def preprocess_example(example):\n",
    "    try:\n",
    "        # Convert string of tokens to a list and parse NER tags to integers\n",
    "        example[\"tokens\"] = ast.literal_eval(example[\"tokens\"])\n",
    "        example[\"ner_tags\"] = list(map(int, ast.literal_eval(example[\"ner_tags\"])))\n",
    "    except (ValueError, SyntaxError) as e:\n",
    "        # Skip and log malformed examples, ensuring error resilience\n",
    "        print(f\"Skipping malformed example: {example['index']} due to error: {e}\")\n",
    "        example[\"tokens\"] = []\n",
    "        example[\"ner_tags\"] = []\n",
    "    return example\n",
    "\n",
    "# Apply preprocessing to each dataset entry, ensuring consistent formatting\n",
    "dataset = dataset.map(preprocess_example)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 177,
     "referenced_widgets": [
      "f891b1b37df947eeae060bb5daefdea9",
      "470d93420bfc4d23bca07c308f37c316",
      "8e2fd00ac8414b2c92f17d565abacb91",
      "747e1fd9be3240169325520985f51ab0",
      "6f3022c618f548e0b21d8b031fabdfc8",
      "0cf2e757689b44ff8dd5b099d4deec99",
      "e378c4f4d73f42469868b21c0b5168db",
      "048f42e41b8e45a0847370c4ecec5c75",
      "c9dee5569d5340a2a7cbf59eb6a3cd1f",
      "8d8255757c434c74b955ee6ee6118282",
      "de1715ae4aa04626a4a2243fc7274908",
      "642afa2917004372804f7069dfde13b6",
      "a6181461c40040548b4afe1cc25d3eb9",
      "fd779f16d24a4cf481b32ed6d7d386b4",
      "ce2f73c695c84899864bd8b90ce61acf",
      "91d6176ee34e43b6bdc589651a84bb39",
      "784e878215004b05a056ca81b603e225",
      "e5d4bdd8daa5443a9fb56198088232bb",
      "1e8546990a404bffa6cacc8ebcc455be",
      "b32ad591846a4b2bbdbd5c5294dfa1e3",
      "014ac61faabf4dacbf8329d9f22a7d74",
      "c0753a29729549cc916b418d573deff9",
      "e9f6227be31945afac150d187957647e",
      "82ed7a9ba8df4adaaf1d231075efb691",
      "7300805850d246f3835e49a289b02671",
      "891da1220f74415288167b0f35040b52",
      "85661d8637c647889b3dafaae662c8c1",
      "82278ca1ab3e4106ab564b41ed0b927b",
      "193b019db0f04dbbac49d3b30f82656e",
      "93e964b3e62b41749b61a73323d71bef",
      "4bb3f9448e4f4faba78182c611bce084",
      "0e3c916040074de588a41860861a9516",
      "1d905e050d3a42149fab7130b62c7d1d",
      "2f27db2330d7432b8499a888e5821a36",
      "e0e314c4a33b4e18bc5c2e97afe8c17f",
      "a56b84f1b5034c8c81d515246fd87887",
      "4d12a628725640c7ab9e1ba39c2ee6b0",
      "0137fa7d552b4a00bd816a0f54ebcee3",
      "c255450a404e439faa7f7ba1b9f9b653",
      "aa091f0d4006467ba27ad0ca51550699",
      "fcb683aa15fa426183a93ba7c10b7c74",
      "6ec43eaa004c4332abee14f0a8b80af0",
      "cc67d9ac9d994d1382105b0c28aab58f",
      "21259e62de9c4d20a8d201e0932326b1",
      "ec4dcd069102401a8986f7a4f2d5ec1f",
      "a84e04c3028d4aeaaa6538bc42d451a6",
      "9a61a654eb464427ac2a6b6a39c313cf",
      "cddf6d30c1da48b3a5a2f3d9fbf2f17e",
      "95759f5980b24a349c6e5977f697f9d3",
      "7e73fe17010c474c97bc87da3b074857",
      "8281d037241b428eaa03670abb52afdb",
      "9dd78a2d839443219a70975dbeeff3a5",
      "29e3fcbe8f734b0e88ba3b42cf424c8a",
      "5b7e4d0f3f4f4a199e9315e7c16da8a0",
      "d1ee514ad0c0466fb548756d8aefc16d"
     ]
    },
    "executionInfo": {
     "elapsed": 55700,
     "status": "ok",
     "timestamp": 1730569818501,
     "user": {
      "displayName": "Ismat Samadov",
      "userId": "13714662825869203427"
     },
     "user_tz": -240
    },
    "id": "-24SJijT-zth",
    "outputId": "23b9a2b1-b7f3-437c-cecf-a988402b54f3"
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f891b1b37df947eeae060bb5daefdea9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "642afa2917004372804f7069dfde13b6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e9f6227be31945afac150d187957647e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2f27db2330d7432b8499a888e5821a36",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ec4dcd069102401a8986f7a4f2d5ec1f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map:   0%|          | 0/99545 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Initialize the tokenizer for multilingual NER using XLM-RoBERTa\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"xlm-roberta-base\")\n",
    "\n",
    "# Function to tokenize input and align labels with tokenized words\n",
    "def tokenize_and_align_labels(example):\n",
    "    # Tokenize the sentence while preserving word boundaries for correct NER tag alignment\n",
    "    tokenized_inputs = tokenizer(\n",
    "        example[\"tokens\"],            # List of words (tokens) in the sentence\n",
    "        truncation=True,               # Truncate sentences longer than max_length\n",
    "        is_split_into_words=True,      # Specify that input is a list of words\n",
    "        padding=\"max_length\",          # Pad to maximum sequence length\n",
    "        max_length=128,                # Set the maximum sequence length to 128 tokens\n",
    "    )\n",
    "\n",
    "    labels = []                        # List to store aligned NER labels\n",
    "    word_ids = tokenized_inputs.word_ids()  # Get word IDs for each token\n",
    "    previous_word_idx = None           # Initialize previous word index for tracking\n",
    "\n",
    "    # Loop through word indices to align NER tags with subword tokens\n",
    "    for word_idx in word_ids:\n",
    "        if word_idx is None:\n",
    "            labels.append(-100)        # Set padding token labels to -100 (ignored in loss)\n",
    "        elif word_idx != previous_word_idx:\n",
    "            # Assign the label from example's NER tags if word index matches\n",
    "            labels.append(example[\"ner_tags\"][word_idx] if word_idx < len(example[\"ner_tags\"]) else -100)\n",
    "        else:\n",
    "            labels.append(-100)        # Label subword tokens with -100 to avoid redundant labels\n",
    "        previous_word_idx = word_idx   # Update previous word index\n",
    "\n",
    "    tokenized_inputs[\"labels\"] = labels  # Add labels to tokenized inputs\n",
    "    return tokenized_inputs\n",
    "\n",
    "# Apply tokenization and label alignment function to the dataset\n",
    "tokenized_datasets = dataset.map(tokenize_and_align_labels, batched=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4,
     "status": "ok",
     "timestamp": 1730569818502,
     "user": {
      "displayName": "Ismat Samadov",
      "userId": "13714662825869203427"
     },
     "user_tz": -240
    },
    "id": "DA7mW2it-zoo",
    "outputId": "d4830123-d1c4-4056-8fcf-01d50ed580e9"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DatasetDict({\n",
      "    train: Dataset({\n",
      "        features: ['index', 'tokens', 'ner_tags', 'input_ids', 'attention_mask', 'labels'],\n",
      "        num_rows: 89590\n",
      "    })\n",
      "    test: Dataset({\n",
      "        features: ['index', 'tokens', 'ner_tags', 'input_ids', 'attention_mask', 'labels'],\n",
      "        num_rows: 9955\n",
      "    })\n",
      "})\n"
     ]
    }
   ],
   "source": [
    "# Create a 90-10 split of the dataset for training and validation\n",
    "tokenized_datasets = tokenized_datasets[\"train\"].train_test_split(test_size=0.1)\n",
    "print(tokenized_datasets)  # Output structure of split datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "-lVHfKEE-zmm"
   },
   "outputs": [],
   "source": [
    "# Define a list of entity labels for NER tagging with B- (beginning) and I- (inside) markers\n",
    "label_list = [\n",
    "    \"O\",                  # Outside of a named entity\n",
    "    \"B-PERSON\", \"I-PERSON\",         # Person name (e.g., \"John\" in \"John Doe\")\n",
    "    \"B-LOCATION\", \"I-LOCATION\",     # Geographical location (e.g., \"Paris\")\n",
    "    \"B-ORGANISATION\", \"I-ORGANISATION\", # Organization name (e.g., \"UNICEF\")\n",
    "    \"B-DATE\", \"I-DATE\",             # Date entity (e.g., \"2024-11-05\")\n",
    "    \"B-TIME\", \"I-TIME\",             # Time (e.g., \"12:00 PM\")\n",
    "    \"B-MONEY\", \"I-MONEY\",           # Monetary values (e.g., \"$20\")\n",
    "    \"B-PERCENTAGE\", \"I-PERCENTAGE\", # Percentage values (e.g., \"20%\")\n",
    "    \"B-FACILITY\", \"I-FACILITY\",     # Physical facilities (e.g., \"Airport\")\n",
    "    \"B-PRODUCT\", \"I-PRODUCT\",       # Product names (e.g., \"iPhone\")\n",
    "    \"B-EVENT\", \"I-EVENT\",           # Named events (e.g., \"Olympics\")\n",
    "    \"B-ART\", \"I-ART\",               # Works of art (e.g., \"Mona Lisa\")\n",
    "    \"B-LAW\", \"I-LAW\",               # Laws and legal documents (e.g., \"Article 50\")\n",
    "    \"B-LANGUAGE\", \"I-LANGUAGE\",     # Languages (e.g., \"Azerbaijani\")\n",
    "    \"B-GPE\", \"I-GPE\",               # Geopolitical entities (e.g., \"Europe\")\n",
    "    \"B-NORP\", \"I-NORP\",             # Nationalities, religious groups, political groups\n",
    "    \"B-ORDINAL\", \"I-ORDINAL\",       # Ordinal indicators (e.g., \"first\", \"second\")\n",
    "    \"B-CARDINAL\", \"I-CARDINAL\",     # Cardinal numbers (e.g., \"three\")\n",
    "    \"B-DISEASE\", \"I-DISEASE\",       # Diseases (e.g., \"COVID-19\")\n",
    "    \"B-CONTACT\", \"I-CONTACT\",       # Contact info (e.g., email or phone number)\n",
    "    \"B-ADAGE\", \"I-ADAGE\",           # Common sayings or adages\n",
    "    \"B-QUANTITY\", \"I-QUANTITY\",     # Quantities (e.g., \"5 km\")\n",
    "    \"B-MISCELLANEOUS\", \"I-MISCELLANEOUS\", # Miscellaneous entities not fitting other categories\n",
    "    \"B-POSITION\", \"I-POSITION\",     # Job titles or positions (e.g., \"CEO\")\n",
    "    \"B-PROJECT\", \"I-PROJECT\"        # Project names (e.g., \"Project Apollo\")\n",
    "]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 105,
     "referenced_widgets": [
      "61375655521643eba759627e20a995e2",
      "6838baef09dc472392da22f0df45da05",
      "d2bff630e99949acbf1f96ec82891b0b",
      "130a9da286934112b0621a1bda3a91db",
      "5de921168c404ce78dcb72b68b9ef61f",
      "fdc06ee8fd15441491cf4233afacc864",
      "ae18a99e3f9941b78220e5207f3b6f7c",
      "9e5b9c0b3d7a41eaa39a237ce23a262a",
      "64e949b60f8d45918329760e07f0c94c",
      "2a3b340c95c9443ca1396686173b534a",
      "995621d110a34982960177b9e3d6e5af"
     ]
    },
    "executionInfo": {
     "elapsed": 6243,
     "status": "ok",
     "timestamp": 1730569844595,
     "user": {
      "displayName": "Ismat Samadov",
      "userId": "13714662825869203427"
     },
     "user_tz": -240
    },
    "id": "jUfWCaen-zjr",
    "outputId": "5c2a4470-3c9b-4f80-d841-24bff349c52f"
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "61375655521643eba759627e20a995e2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    }
   ],
   "source": [
    "# Initialize a data collator to handle padding and formatting for token classification\n",
    "data_collator = DataCollatorForTokenClassification(tokenizer)\n",
    "\n",
    "# Load a pre-trained model for token classification, adapted for NER tasks\n",
    "model = AutoModelForTokenClassification.from_pretrained(\n",
    "    \"xlm-roberta-large\",               # Base model (multilingual XLM-RoBERTa) for NER\n",
    "    num_labels=len(label_list)        # Set the number of output labels to match NER categories\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "9b7EajE_-zhS"
   },
   "outputs": [],
   "source": [
    "# Define a function to compute evaluation metrics for the model's predictions\n",
    "def compute_metrics(p):\n",
    "    predictions, labels = p  # Unpack predictions and true labels from the input\n",
    "\n",
    "    # Convert logits to predicted label indices by taking the argmax along the last axis\n",
    "    predictions = np.argmax(predictions, axis=2)\n",
    "\n",
    "    # Filter out special padding labels (-100) and convert indices to label names\n",
    "    true_labels = [[label_list[l] for l in label if l != -100] for label in labels]\n",
    "    true_predictions = [\n",
    "        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]\n",
    "        for prediction, label in zip(predictions, labels)\n",
    "    ]\n",
    "\n",
    "    # Print a detailed classification report for each label category\n",
    "    print(classification_report(true_labels, true_predictions))\n",
    "\n",
    "    # Calculate and return key evaluation metrics\n",
    "    return {\n",
    "        # Precision measures the accuracy of predicted positive instances\n",
    "        # Important in NER to ensure entity predictions are correct and reduce false positives.\n",
    "        \"precision\": precision_score(true_labels, true_predictions),\n",
    "\n",
    "        # Recall measures the model's ability to capture all relevant entities\n",
    "        # Essential in NER to ensure the model captures all entities, reducing false negatives.\n",
    "        \"recall\": recall_score(true_labels, true_predictions),\n",
    "\n",
    "        # F1-score is the harmonic mean of precision and recall, balancing both metrics\n",
    "        # Useful in NER for providing an overall performance measure, especially when precision and recall are both important.\n",
    "        \"f1\": f1_score(true_labels, true_predictions),\n",
    "    }\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "PmJTMpp6-zew"
   },
   "outputs": [],
   "source": [
    "# Set up training arguments for model training, defining essential training configurations\n",
    "training_args = TrainingArguments(\n",
    "    output_dir=\"./results\",               # Directory to save model checkpoints and final outputs\n",
    "    evaluation_strategy=\"epoch\",          # Evaluate model on the validation set at the end of each epoch\n",
    "    save_strategy=\"epoch\",                # Save model checkpoints at the end of each epoch\n",
    "    learning_rate=2e-5,                   # Set a low learning rate to ensure stable training for fine-tuning\n",
    "    per_device_train_batch_size=128,       # Number of examples per batch during training, balancing speed and memory\n",
    "    per_device_eval_batch_size=128,        # Number of examples per batch during evaluation\n",
    "    num_train_epochs=12,                   # Number of full training passes over the dataset\n",
    "    weight_decay=0.005,                    # Regularization term to prevent overfitting by penalizing large weights\n",
    "    fp16=True,                            # Use 16-bit floating point for faster and memory-efficient training\n",
    "    logging_dir='./logs',                 # Directory to store training logs\n",
    "    save_total_limit=2,                   # Keep only the 2 latest model checkpoints to save storage space\n",
    "    load_best_model_at_end=True,          # Load the best model based on metrics at the end of training\n",
    "    metric_for_best_model=\"f1\",           # Use F1-score to determine the best model checkpoint\n",
    "    report_to=\"none\"                      # Disable reporting to external services (useful in local runs)\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "WqoF7QJy-zb2"
   },
   "outputs": [],
   "source": [
    "# Initialize the Trainer class to manage the training loop with all necessary components\n",
    "trainer = Trainer(\n",
    "    model=model,                         # The pre-trained model to be fine-tuned\n",
    "    args=training_args,                  # Training configuration parameters defined in TrainingArguments\n",
    "    train_dataset=tokenized_datasets[\"train\"],  # Tokenized training dataset\n",
    "    eval_dataset=tokenized_datasets[\"test\"],    # Tokenized validation dataset\n",
    "    tokenizer=tokenizer,                 # Tokenizer used for processing input text\n",
    "    data_collator=data_collator,         # Data collator for padding and batching during training\n",
    "    compute_metrics=compute_metrics,     # Function to calculate evaluation metrics like precision, recall, F1\n",
    "    callbacks=[EarlyStoppingCallback(early_stopping_patience=5)] # Stop training early if validation metrics don't improve for 2 epochs\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "executionInfo": {
     "elapsed": 1179788,
     "status": "ok",
     "timestamp": 1730571069005,
     "user": {
      "displayName": "Ismat Samadov",
      "userId": "13714662825869203427"
     },
     "user_tz": -240
    },
    "id": "QveYYwvA-zUR",
    "outputId": "a96f9658-96c1-479f-fa66-385d2e527103"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='9800' max='11200' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [ 9800/11200 19:26 < 02:46, 8.40 it/s, Epoch 7/8]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Epoch</th>\n",
       "      <th>Training Loss</th>\n",
       "      <th>Validation Loss</th>\n",
       "      <th>Precision</th>\n",
       "      <th>Recall</th>\n",
       "      <th>F1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.323100</td>\n",
       "      <td>0.275503</td>\n",
       "      <td>0.775799</td>\n",
       "      <td>0.694886</td>\n",
       "      <td>0.733117</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.272500</td>\n",
       "      <td>0.262481</td>\n",
       "      <td>0.739266</td>\n",
       "      <td>0.739900</td>\n",
       "      <td>0.739583</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.248600</td>\n",
       "      <td>0.252498</td>\n",
       "      <td>0.751478</td>\n",
       "      <td>0.741152</td>\n",
       "      <td>0.746280</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.236800</td>\n",
       "      <td>0.249968</td>\n",
       "      <td>0.754882</td>\n",
       "      <td>0.741449</td>\n",
       "      <td>0.748105</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.223800</td>\n",
       "      <td>0.252187</td>\n",
       "      <td>0.764390</td>\n",
       "      <td>0.740460</td>\n",
       "      <td>0.752235</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>0.218600</td>\n",
       "      <td>0.249887</td>\n",
       "      <td>0.756352</td>\n",
       "      <td>0.741646</td>\n",
       "      <td>0.748927</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>0.209700</td>\n",
       "      <td>0.250748</td>\n",
       "      <td>0.760696</td>\n",
       "      <td>0.739438</td>\n",
       "      <td>0.749916</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "         ART       0.63      0.15      0.25      1857\n",
      "        DATE       0.53      0.39      0.45       880\n",
      "       EVENT       1.00      0.10      0.19        96\n",
      "    FACILITY       0.71      0.64      0.67      1170\n",
      "         LAW       0.56      0.59      0.57      1122\n",
      "    LOCATION       0.80      0.75      0.77      9132\n",
      "       MONEY       0.63      0.48      0.54       540\n",
      "ORGANISATION       0.67      0.58      0.62       544\n",
      "  PERCENTAGE       0.78      0.80      0.79      3591\n",
      "      PERSON       0.86      0.82      0.84      7037\n",
      "     PRODUCT       0.82      0.84      0.83      2808\n",
      "        TIME       0.59      0.35      0.44      1569\n",
      "\n",
      "   micro avg       0.78      0.69      0.73     30346\n",
      "   macro avg       0.71      0.54      0.58     30346\n",
      "weighted avg       0.76      0.69      0.72     30346\n",
      "\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "         ART       0.62      0.17      0.26      1857\n",
      "        DATE       0.46      0.47      0.46       880\n",
      "       EVENT       0.70      0.34      0.46        96\n",
      "    FACILITY       0.66      0.71      0.69      1170\n",
      "         LAW       0.55      0.63      0.58      1122\n",
      "    LOCATION       0.75      0.82      0.78      9132\n",
      "       MONEY       0.58      0.56      0.57       540\n",
      "ORGANISATION       0.66      0.66      0.66       544\n",
      "  PERCENTAGE       0.76      0.84      0.80      3591\n",
      "      PERSON       0.86      0.82      0.84      7037\n",
      "     PRODUCT       0.82      0.85      0.83      2808\n",
      "        TIME       0.51      0.54      0.52      1569\n",
      "\n",
      "   micro avg       0.74      0.74      0.74     30346\n",
      "   macro avg       0.66      0.62      0.62     30346\n",
      "weighted avg       0.74      0.74      0.73     30346\n",
      "\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "         ART       0.45      0.22      0.29      1857\n",
      "        DATE       0.50      0.47      0.49       880\n",
      "       EVENT       0.71      0.35      0.47        96\n",
      "    FACILITY       0.70      0.68      0.69      1170\n",
      "         LAW       0.60      0.61      0.60      1122\n",
      "    LOCATION       0.77      0.82      0.79      9132\n",
      "       MONEY       0.56      0.58      0.57       540\n",
      "ORGANISATION       0.64      0.67      0.65       544\n",
      "  PERCENTAGE       0.79      0.83      0.81      3591\n",
      "      PERSON       0.87      0.83      0.85      7037\n",
      "     PRODUCT       0.80      0.87      0.83      2808\n",
      "        TIME       0.53      0.52      0.53      1569\n",
      "\n",
      "   micro avg       0.75      0.74      0.75     30346\n",
      "   macro avg       0.66      0.62      0.63     30346\n",
      "weighted avg       0.74      0.74      0.74     30346\n",
      "\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "         ART       0.55      0.20      0.29      1857\n",
      "        DATE       0.48      0.49      0.49       880\n",
      "       EVENT       0.68      0.35      0.47        96\n",
      "    FACILITY       0.71      0.68      0.69      1170\n",
      "         LAW       0.61      0.57      0.59      1122\n",
      "    LOCATION       0.77      0.82      0.79      9132\n",
      "       MONEY       0.59      0.59      0.59       540\n",
      "ORGANISATION       0.69      0.68      0.69       544\n",
      "  PERCENTAGE       0.76      0.84      0.80      3591\n",
      "      PERSON       0.88      0.82      0.85      7037\n",
      "     PRODUCT       0.81      0.86      0.84      2808\n",
      "        TIME       0.52      0.56      0.54      1569\n",
      "\n",
      "   micro avg       0.75      0.74      0.75     30346\n",
      "   macro avg       0.67      0.62      0.63     30346\n",
      "weighted avg       0.75      0.74      0.74     30346\n",
      "\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "         ART       0.54      0.20      0.29      1857\n",
      "        DATE       0.52      0.47      0.50       880\n",
      "       EVENT       0.69      0.35      0.47        96\n",
      "    FACILITY       0.69      0.69      0.69      1170\n",
      "         LAW       0.60      0.61      0.60      1122\n",
      "    LOCATION       0.77      0.82      0.80      9132\n",
      "       MONEY       0.61      0.57      0.59       540\n",
      "ORGANISATION       0.69      0.68      0.69       544\n",
      "  PERCENTAGE       0.79      0.82      0.81      3591\n",
      "      PERSON       0.87      0.83      0.85      7037\n",
      "     PRODUCT       0.83      0.85      0.84      2808\n",
      "        TIME       0.55      0.51      0.53      1569\n",
      "\n",
      "   micro avg       0.76      0.74      0.75     30346\n",
      "   macro avg       0.68      0.62      0.64     30346\n",
      "weighted avg       0.75      0.74      0.74     30346\n",
      "\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "         ART       0.43      0.23      0.30      1857\n",
      "        DATE       0.51      0.47      0.49       880\n",
      "       EVENT       0.69      0.35      0.47        96\n",
      "    FACILITY       0.72      0.67      0.69      1170\n",
      "         LAW       0.62      0.59      0.60      1122\n",
      "    LOCATION       0.78      0.81      0.79      9132\n",
      "       MONEY       0.58      0.59      0.59       540\n",
      "ORGANISATION       0.69      0.70      0.69       544\n",
      "  PERCENTAGE       0.77      0.84      0.80      3591\n",
      "      PERSON       0.87      0.83      0.85      7037\n",
      "     PRODUCT       0.80      0.87      0.83      2808\n",
      "        TIME       0.54      0.55      0.54      1569\n",
      "\n",
      "   micro avg       0.76      0.74      0.75     30346\n",
      "   macro avg       0.67      0.62      0.64     30346\n",
      "weighted avg       0.75      0.74      0.74     30346\n",
      "\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "         ART       0.44      0.23      0.30      1857\n",
      "        DATE       0.50      0.49      0.50       880\n",
      "       EVENT       0.65      0.36      0.47        96\n",
      "    FACILITY       0.70      0.70      0.70      1170\n",
      "         LAW       0.60      0.61      0.60      1122\n",
      "    LOCATION       0.79      0.80      0.80      9132\n",
      "       MONEY       0.56      0.60      0.58       540\n",
      "ORGANISATION       0.68      0.70      0.69       544\n",
      "  PERCENTAGE       0.78      0.82      0.80      3591\n",
      "      PERSON       0.87      0.83      0.85      7037\n",
      "     PRODUCT       0.82      0.86      0.84      2808\n",
      "        TIME       0.55      0.54      0.54      1569\n",
      "\n",
      "   micro avg       0.76      0.74      0.75     30346\n",
      "   macro avg       0.66      0.63      0.64     30346\n",
      "weighted avg       0.75      0.74      0.74     30346\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='156' max='156' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [156/156 00:06]\n",
       "    </div>\n",
       "    "
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "         ART       0.54      0.20      0.29      1857\n",
      "        DATE       0.52      0.47      0.50       880\n",
      "       EVENT       0.69      0.35      0.47        96\n",
      "    FACILITY       0.69      0.69      0.69      1170\n",
      "         LAW       0.60      0.61      0.60      1122\n",
      "    LOCATION       0.77      0.82      0.80      9132\n",
      "       MONEY       0.61      0.57      0.59       540\n",
      "ORGANISATION       0.69      0.68      0.69       544\n",
      "  PERCENTAGE       0.79      0.82      0.81      3591\n",
      "      PERSON       0.87      0.83      0.85      7037\n",
      "     PRODUCT       0.83      0.85      0.84      2808\n",
      "        TIME       0.55      0.51      0.53      1569\n",
      "\n",
      "   micro avg       0.76      0.74      0.75     30346\n",
      "   macro avg       0.68      0.62      0.64     30346\n",
      "weighted avg       0.75      0.74      0.74     30346\n",
      "\n",
      "{'eval_loss': 0.2521866261959076, 'eval_precision': 0.7643897128861069, 'eval_recall': 0.7404600276807487, 'eval_f1': 0.752234608817917, 'eval_runtime': 11.2348, 'eval_samples_per_second': 886.088, 'eval_steps_per_second': 13.885, 'epoch': 7.0}\n"
     ]
    }
   ],
   "source": [
    "# Begin the training process and capture the training metrics\n",
    "training_metrics = trainer.train()\n",
    "\n",
    "# Evaluate the model on the validation set after training\n",
    "eval_results = trainer.evaluate()\n",
    "\n",
    "# Print evaluation results, including precision, recall, and F1-score\n",
    "print(eval_results)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 2489,
     "status": "ok",
     "timestamp": 1730571071491,
     "user": {
      "displayName": "Ismat Samadov",
      "userId": "13714662825869203427"
     },
     "user_tz": -240
    },
    "id": "7yEFe2_n-zPG",
    "outputId": "622ba174-8cc7-4b8f-f5e5-a959a762360b"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('./XLM-RoBERTa/tokenizer_config.json',\n",
       " './XLM-RoBERTa/special_tokens_map.json',\n",
       " './XLM-RoBERTa/sentencepiece.bpe.model',\n",
       " './XLM-RoBERTa/added_tokens.json',\n",
       " './XLM-RoBERTa/tokenizer.json')"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Define the directory where the trained model and tokenizer will be saved\n",
    "save_directory = \"./XLM-RoBERTa\"\n",
    "\n",
    "# Save the trained model to the specified directory\n",
    "model.save_pretrained(save_directory)\n",
    "\n",
    "# Save the tokenizer to the same directory for compatibility with the model\n",
    "tokenizer.save_pretrained(save_directory)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "zkECg3v9-zNQ"
   },
   "outputs": [],
   "source": [
    "from transformers import pipeline\n",
    "\n",
    "# Load tokenizer and model\n",
    "tokenizer = AutoTokenizer.from_pretrained(save_directory)\n",
    "model = AutoModelForTokenClassification.from_pretrained(save_directory)\n",
    "\n",
    "# Initialize the NER pipeline\n",
    "device = 0 if torch.cuda.is_available() else -1\n",
    "nlp_ner = pipeline(\"ner\", model=model, tokenizer=tokenizer, aggregation_strategy=\"simple\", device=device)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "SOFqXU-M_bxO"
   },
   "outputs": [],
   "source": [
    "label_mapping = {f\"LABEL_{i}\": label for i, label in enumerate(label_list) if label != \"O\"}\n",
    "\n",
    "def evaluate_model(test_texts, true_labels):\n",
    "    predictions = []\n",
    "    for i, text in enumerate(test_texts):\n",
    "        pred_entities = nlp_ner(text)\n",
    "        pred_labels = [label_mapping.get(entity[\"entity_group\"], \"O\") for entity in pred_entities if entity[\"entity_group\"] in label_mapping]\n",
    "        if len(pred_labels) != len(true_labels[i]):\n",
    "            print(f\"Warning: Inconsistent number of entities in sample {i+1}. Adjusting predicted entities.\")\n",
    "            pred_labels = pred_labels[:len(true_labels[i])]\n",
    "        predictions.append(pred_labels)\n",
    "    if all(len(true) == len(pred) for true, pred in zip(true_labels, predictions)):\n",
    "        precision = precision_score(true_labels, predictions)\n",
    "        recall = recall_score(true_labels, predictions)\n",
    "        f1 = f1_score(true_labels, predictions)\n",
    "        print(\"Precision:\", precision)\n",
    "        print(\"Recall:\", recall)\n",
    "        print(\"F1-Score:\", f1)\n",
    "        print(classification_report(true_labels, predictions))\n",
    "    else:\n",
    "        print(\"Error: Could not align all samples correctly for evaluation.\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 5,
     "status": "ok",
     "timestamp": 1730571073034,
     "user": {
      "displayName": "Ismat Samadov",
      "userId": "13714662825869203427"
     },
     "user_tz": -240
    },
    "id": "WRCB-_66_buE",
    "outputId": "c9a05d33-6fa4-4bae-93a0-04bf325683f8"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Precision: 0.5\n",
      "Recall: 0.5\n",
      "F1-Score: 0.5\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "    LOCATION       0.00      0.00      0.00         0\n",
      "ORGANISATION       0.00      0.00      0.00         1\n",
      "      PERSON       1.00      1.00      1.00         1\n",
      "\n",
      "   micro avg       0.50      0.50      0.50         2\n",
      "   macro avg       0.33      0.33      0.33         2\n",
      "weighted avg       0.50      0.50      0.50         2\n",
      "\n"
     ]
    }
   ],
   "source": [
    "test_texts = [\"Shahla Khuduyeva və Pasha Sığorta şirkəti haqqında məlumat.\"]\n",
    "true_labels = [[\"B-PERSON\", \"B-ORGANISATION\"]]\n",
    "evaluate_model(test_texts, true_labels)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "x53zS3Vv_brU"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "5Uoebirj_boo"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "RKounG2l_bl5"
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "authorship_tag": "ABX9TyNEIe3MLYmbuVlh01xxHiZi",
   "gpuType": "A100",
   "machine_shape": "hm",
   "mount_file_id": "1EYYZa7dya2RjTZXHSJ4pzIOgzqR8lmSk",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "0137fa7d552b4a00bd816a0f54ebcee3": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "014ac61faabf4dacbf8329d9f22a7d74": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "048f42e41b8e45a0847370c4ecec5c75": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "0835d840cd7a4444b477187796df6ab1": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_c324af888b614caa9cead361c9decafe",
      "max": 99545,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_542aeebec2144a79b36baa0fdfb4bc32",
      "value": 99545
     }
    },
    "0b430c3c4ff641a38e4a6603b30dfdf0": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_975acf8253a3411293d961167054e7f0",
      "placeholder": "​",
      "style": "IPY_MODEL_1c184b2b6c164ac4965182b4dd760faa",
      "value": " 99545/99545 [00:17&lt;00:00, 7438.72 examples/s]"
     }
    },
    "0cf2e757689b44ff8dd5b099d4deec99": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "0e3c916040074de588a41860861a9516": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "130a9da286934112b0621a1bda3a91db": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_2a3b340c95c9443ca1396686173b534a",
      "placeholder": "​",
      "style": "IPY_MODEL_995621d110a34982960177b9e3d6e5af",
      "value": " 1.12G/1.12G [00:05&lt;00:00, 206MB/s]"
     }
    },
    "193b019db0f04dbbac49d3b30f82656e": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "1a4a5ae499224227b594f7e44ec05626": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "1b3d4b4f685f47f5b4cd1a83e76330c0": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_f553dfdc8cf441a6990a8dc2126e55e5",
      "max": 2872,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_dcfd93ce24474f639418c7f2b56ce101",
      "value": 2872
     }
    },
    "1c184b2b6c164ac4965182b4dd760faa": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "1d905e050d3a42149fab7130b62c7d1d": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "1e8546990a404bffa6cacc8ebcc455be": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "21259e62de9c4d20a8d201e0932326b1": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "228bb22c294f4fd1a08910c080586e65": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_c8b59cd42f04425e8de6f832acff43a4",
      "placeholder": "​",
      "style": "IPY_MODEL_5546cbf48870406ca3e93e6ebed6487b",
      "value": "README.md: 100%"
     }
    },
    "29e3fcbe8f734b0e88ba3b42cf424c8a": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "2a3b340c95c9443ca1396686173b534a": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "2e185b6e38e348919accedcf54fe32ec": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "2f27db2330d7432b8499a888e5821a36": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_e0e314c4a33b4e18bc5c2e97afe8c17f",
       "IPY_MODEL_a56b84f1b5034c8c81d515246fd87887",
       "IPY_MODEL_4d12a628725640c7ab9e1ba39c2ee6b0"
      ],
      "layout": "IPY_MODEL_0137fa7d552b4a00bd816a0f54ebcee3"
     }
    },
    "30f20a8152e3450cb5c0c5300f2a677b": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "371faab019ee49d78fa2547a65fc68b9": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "3d96ee9792cf4ba5b57b7858678985bb": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_7a14fd2365fa40ac92e8851019639a46",
      "placeholder": "​",
      "style": "IPY_MODEL_804273498d974d388585d2f8769db2cb",
      "value": " 2.87k/2.87k [00:00&lt;00:00, 227kB/s]"
     }
    },
    "470d93420bfc4d23bca07c308f37c316": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_0cf2e757689b44ff8dd5b099d4deec99",
      "placeholder": "​",
      "style": "IPY_MODEL_e378c4f4d73f42469868b21c0b5168db",
      "value": "tokenizer_config.json: 100%"
     }
    },
    "4bb3f9448e4f4faba78182c611bce084": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "4d12a628725640c7ab9e1ba39c2ee6b0": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_cc67d9ac9d994d1382105b0c28aab58f",
      "placeholder": "​",
      "style": "IPY_MODEL_21259e62de9c4d20a8d201e0932326b1",
      "value": " 9.10M/9.10M [00:00&lt;00:00, 20.6MB/s]"
     }
    },
    "4d770190eebf44e59e4c85598c48bb5e": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "542aeebec2144a79b36baa0fdfb4bc32": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "5546cbf48870406ca3e93e6ebed6487b": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "5b7e4d0f3f4f4a199e9315e7c16da8a0": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "5de921168c404ce78dcb72b68b9ef61f": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "61375655521643eba759627e20a995e2": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_6838baef09dc472392da22f0df45da05",
       "IPY_MODEL_d2bff630e99949acbf1f96ec82891b0b",
       "IPY_MODEL_130a9da286934112b0621a1bda3a91db"
      ],
      "layout": "IPY_MODEL_5de921168c404ce78dcb72b68b9ef61f"
     }
    },
    "642afa2917004372804f7069dfde13b6": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_a6181461c40040548b4afe1cc25d3eb9",
       "IPY_MODEL_fd779f16d24a4cf481b32ed6d7d386b4",
       "IPY_MODEL_ce2f73c695c84899864bd8b90ce61acf"
      ],
      "layout": "IPY_MODEL_91d6176ee34e43b6bdc589651a84bb39"
     }
    },
    "64e949b60f8d45918329760e07f0c94c": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "6786dbb578764ecbb07549f99284e872": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "6838baef09dc472392da22f0df45da05": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_fdc06ee8fd15441491cf4233afacc864",
      "placeholder": "​",
      "style": "IPY_MODEL_ae18a99e3f9941b78220e5207f3b6f7c",
      "value": "model.safetensors: 100%"
     }
    },
    "6d2ed7f8c8e54d32b94a410de93bebb4": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_f1247d8b523e4d03bf802ea761090255",
      "placeholder": "​",
      "style": "IPY_MODEL_dc951346b3d742d7861929d4d4bd69db",
      "value": "Generating train split: 100%"
     }
    },
    "6dd8528c7a8c4cf59a8be94ea2eec2ac": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "6ec43eaa004c4332abee14f0a8b80af0": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "6f3022c618f548e0b21d8b031fabdfc8": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "7300805850d246f3835e49a289b02671": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_93e964b3e62b41749b61a73323d71bef",
      "max": 5069051,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_4bb3f9448e4f4faba78182c611bce084",
      "value": 5069051
     }
    },
    "747e1fd9be3240169325520985f51ab0": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_8d8255757c434c74b955ee6ee6118282",
      "placeholder": "​",
      "style": "IPY_MODEL_de1715ae4aa04626a4a2243fc7274908",
      "value": " 25.0/25.0 [00:00&lt;00:00, 2.23kB/s]"
     }
    },
    "77528b7a72024e3ba2c03f495188b091": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_371faab019ee49d78fa2547a65fc68b9",
      "placeholder": "​",
      "style": "IPY_MODEL_30f20a8152e3450cb5c0c5300f2a677b",
      "value": " 99545/99545 [00:00&lt;00:00, 594401.54 examples/s]"
     }
    },
    "784e878215004b05a056ca81b603e225": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "7a14fd2365fa40ac92e8851019639a46": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "7de65e247932425882a27257bf77b913": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_228bb22c294f4fd1a08910c080586e65",
       "IPY_MODEL_1b3d4b4f685f47f5b4cd1a83e76330c0",
       "IPY_MODEL_3d96ee9792cf4ba5b57b7858678985bb"
      ],
      "layout": "IPY_MODEL_84a7d8ad32da45c792d570a918f136d6"
     }
    },
    "7e73fe17010c474c97bc87da3b074857": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "804273498d974d388585d2f8769db2cb": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "82278ca1ab3e4106ab564b41ed0b927b": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "8281d037241b428eaa03670abb52afdb": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "82ed7a9ba8df4adaaf1d231075efb691": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_82278ca1ab3e4106ab564b41ed0b927b",
      "placeholder": "​",
      "style": "IPY_MODEL_193b019db0f04dbbac49d3b30f82656e",
      "value": "sentencepiece.bpe.model: 100%"
     }
    },
    "844335f761134693ae7aff60803dc62a": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_9a688924cb2b4a9c984eddee5d6e57d7",
      "placeholder": "​",
      "style": "IPY_MODEL_a2681b1b407e45dbb2277685ad082fd3",
      "value": "Map: 100%"
     }
    },
    "84a7d8ad32da45c792d570a918f136d6": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "85661d8637c647889b3dafaae662c8c1": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "891da1220f74415288167b0f35040b52": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_0e3c916040074de588a41860861a9516",
      "placeholder": "​",
      "style": "IPY_MODEL_1d905e050d3a42149fab7130b62c7d1d",
      "value": " 5.07M/5.07M [00:00&lt;00:00, 7.74MB/s]"
     }
    },
    "8d8255757c434c74b955ee6ee6118282": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "8e2fd00ac8414b2c92f17d565abacb91": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_048f42e41b8e45a0847370c4ecec5c75",
      "max": 25,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_c9dee5569d5340a2a7cbf59eb6a3cd1f",
      "value": 25
     }
    },
    "91d6176ee34e43b6bdc589651a84bb39": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "93e964b3e62b41749b61a73323d71bef": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "95759f5980b24a349c6e5977f697f9d3": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "975acf8253a3411293d961167054e7f0": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "995621d110a34982960177b9e3d6e5af": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "998183d1e43d4b58b6d49796dd6c65d2": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "9a61a654eb464427ac2a6b6a39c313cf": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_9dd78a2d839443219a70975dbeeff3a5",
      "max": 99545,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_29e3fcbe8f734b0e88ba3b42cf424c8a",
      "value": 99545
     }
    },
    "9a688924cb2b4a9c984eddee5d6e57d7": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "9d2dd1835a6d436abdda27377c39951d": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_6dd8528c7a8c4cf59a8be94ea2eec2ac",
      "max": 13578023,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_e53b7766d65f4f31a000d433fe1718d8",
      "value": 13578023
     }
    },
    "9dd78a2d839443219a70975dbeeff3a5": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "9e5b9c0b3d7a41eaa39a237ce23a262a": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "a0d5b69380ab4cccbe9fa9555e733d29": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_a5f12e46915c4c7dbd92a8e6866a2136",
       "IPY_MODEL_9d2dd1835a6d436abdda27377c39951d",
       "IPY_MODEL_bde988b2464c49e38d9d660124e1a1ca"
      ],
      "layout": "IPY_MODEL_6786dbb578764ecbb07549f99284e872"
     }
    },
    "a2681b1b407e45dbb2277685ad082fd3": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "a56b84f1b5034c8c81d515246fd87887": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_fcb683aa15fa426183a93ba7c10b7c74",
      "max": 9096718,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_6ec43eaa004c4332abee14f0a8b80af0",
      "value": 9096718
     }
    },
    "a5f12e46915c4c7dbd92a8e6866a2136": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_1a4a5ae499224227b594f7e44ec05626",
      "placeholder": "​",
      "style": "IPY_MODEL_d71e12cac7384779b7cf6deccdec6205",
      "value": "train-00000-of-00001.parquet: 100%"
     }
    },
    "a6181461c40040548b4afe1cc25d3eb9": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_784e878215004b05a056ca81b603e225",
      "placeholder": "​",
      "style": "IPY_MODEL_e5d4bdd8daa5443a9fb56198088232bb",
      "value": "config.json: 100%"
     }
    },
    "a84e04c3028d4aeaaa6538bc42d451a6": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_7e73fe17010c474c97bc87da3b074857",
      "placeholder": "​",
      "style": "IPY_MODEL_8281d037241b428eaa03670abb52afdb",
      "value": "Map: 100%"
     }
    },
    "aa091f0d4006467ba27ad0ca51550699": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "ae18a99e3f9941b78220e5207f3b6f7c": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "b32ad591846a4b2bbdbd5c5294dfa1e3": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "b42331f0c0ca417ea779764f7c7b91a6": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_2e185b6e38e348919accedcf54fe32ec",
      "max": 99545,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_daa89fa3059144ceaf95c8d8d209c59d",
      "value": 99545
     }
    },
    "bde988b2464c49e38d9d660124e1a1ca": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_ee5a83f45127475496dbb8347c2e3b9e",
      "placeholder": "​",
      "style": "IPY_MODEL_998183d1e43d4b58b6d49796dd6c65d2",
      "value": " 13.6M/13.6M [00:00&lt;00:00, 40.1MB/s]"
     }
    },
    "c0753a29729549cc916b418d573deff9": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "c255450a404e439faa7f7ba1b9f9b653": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "c324af888b614caa9cead361c9decafe": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "c8b59cd42f04425e8de6f832acff43a4": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "c9dee5569d5340a2a7cbf59eb6a3cd1f": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "cc67d9ac9d994d1382105b0c28aab58f": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "cddf6d30c1da48b3a5a2f3d9fbf2f17e": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_5b7e4d0f3f4f4a199e9315e7c16da8a0",
      "placeholder": "​",
      "style": "IPY_MODEL_d1ee514ad0c0466fb548756d8aefc16d",
      "value": " 99545/99545 [00:50&lt;00:00, 1822.96 examples/s]"
     }
    },
    "ce2f73c695c84899864bd8b90ce61acf": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_014ac61faabf4dacbf8329d9f22a7d74",
      "placeholder": "​",
      "style": "IPY_MODEL_c0753a29729549cc916b418d573deff9",
      "value": " 615/615 [00:00&lt;00:00, 46.0kB/s]"
     }
    },
    "d1ee514ad0c0466fb548756d8aefc16d": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "d2bff630e99949acbf1f96ec82891b0b": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_9e5b9c0b3d7a41eaa39a237ce23a262a",
      "max": 1115567652,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_64e949b60f8d45918329760e07f0c94c",
      "value": 1115567652
     }
    },
    "d71e12cac7384779b7cf6deccdec6205": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "daa89fa3059144ceaf95c8d8d209c59d": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "dc951346b3d742d7861929d4d4bd69db": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "dcfd93ce24474f639418c7f2b56ce101": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "de1715ae4aa04626a4a2243fc7274908": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "e0e314c4a33b4e18bc5c2e97afe8c17f": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_c255450a404e439faa7f7ba1b9f9b653",
      "placeholder": "​",
      "style": "IPY_MODEL_aa091f0d4006467ba27ad0ca51550699",
      "value": "tokenizer.json: 100%"
     }
    },
    "e2baeb32f3bb47aeab3d6ebf3a5e8876": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_6d2ed7f8c8e54d32b94a410de93bebb4",
       "IPY_MODEL_b42331f0c0ca417ea779764f7c7b91a6",
       "IPY_MODEL_77528b7a72024e3ba2c03f495188b091"
      ],
      "layout": "IPY_MODEL_4d770190eebf44e59e4c85598c48bb5e"
     }
    },
    "e378c4f4d73f42469868b21c0b5168db": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "e53b7766d65f4f31a000d433fe1718d8": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "e5d4bdd8daa5443a9fb56198088232bb": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "e68c8857489c4b5e83c52b12d32b71e6": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "e9f6227be31945afac150d187957647e": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_82ed7a9ba8df4adaaf1d231075efb691",
       "IPY_MODEL_7300805850d246f3835e49a289b02671",
       "IPY_MODEL_891da1220f74415288167b0f35040b52"
      ],
      "layout": "IPY_MODEL_85661d8637c647889b3dafaae662c8c1"
     }
    },
    "ec4dcd069102401a8986f7a4f2d5ec1f": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_a84e04c3028d4aeaaa6538bc42d451a6",
       "IPY_MODEL_9a61a654eb464427ac2a6b6a39c313cf",
       "IPY_MODEL_cddf6d30c1da48b3a5a2f3d9fbf2f17e"
      ],
      "layout": "IPY_MODEL_95759f5980b24a349c6e5977f697f9d3"
     }
    },
    "ee5a83f45127475496dbb8347c2e3b9e": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "f1247d8b523e4d03bf802ea761090255": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "f553dfdc8cf441a6990a8dc2126e55e5": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "f891b1b37df947eeae060bb5daefdea9": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_470d93420bfc4d23bca07c308f37c316",
       "IPY_MODEL_8e2fd00ac8414b2c92f17d565abacb91",
       "IPY_MODEL_747e1fd9be3240169325520985f51ab0"
      ],
      "layout": "IPY_MODEL_6f3022c618f548e0b21d8b031fabdfc8"
     }
    },
    "fba60dcd62c04b53acba3c371d926149": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_844335f761134693ae7aff60803dc62a",
       "IPY_MODEL_0835d840cd7a4444b477187796df6ab1",
       "IPY_MODEL_0b430c3c4ff641a38e4a6603b30dfdf0"
      ],
      "layout": "IPY_MODEL_e68c8857489c4b5e83c52b12d32b71e6"
     }
    },
    "fcb683aa15fa426183a93ba7c10b7c74": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "fd779f16d24a4cf481b32ed6d7d386b4": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "success",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_1e8546990a404bffa6cacc8ebcc455be",
      "max": 615,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_b32ad591846a4b2bbdbd5c5294dfa1e3",
      "value": 615
     }
    },
    "fdc06ee8fd15441491cf4233afacc864": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    }
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}