{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":22976,"status":"ok","timestamp":1654203367409,"user":{"displayName":"Abdou Rockikz","userId":"01706581904475440028"},"user_tz":-120},"id":"o4-qIq-Rt179","outputId":"b18fc480-eb9f-46d3-98a6-552fddb666aa"},"outputs":[{"name":"stdout","output_type":"stream","text":["Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n","Collecting datasets\n"," Downloading datasets-2.2.2-py3-none-any.whl (346 kB)\n","\u001b[K |████████████████████████████████| 346 kB 8.3 MB/s \n","\u001b[?25hCollecting transformers==4.18.0\n"," Downloading transformers-4.18.0-py3-none-any.whl (4.0 MB)\n","\u001b[K |████████████████████████████████| 4.0 MB 45.3 MB/s \n","\u001b[?25hCollecting sentencepiece\n"," Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n","\u001b[K |████████████████████████████████| 1.2 MB 43.3 MB/s \n","\u001b[?25hRequirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers==4.18.0) (21.3)\n","Collecting sacremoses\n"," Downloading sacremoses-0.0.53.tar.gz (880 kB)\n","\u001b[K |████████████████████████████████| 880 kB 47.0 MB/s \n","\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers==4.18.0) (4.64.0)\n","Collecting huggingface-hub<1.0,>=0.1.0\n"," Downloading huggingface_hub-0.7.0-py3-none-any.whl (86 kB)\n","\u001b[K |████████████████████████████████| 86 kB 5.3 MB/s \n","\u001b[?25hCollecting pyyaml>=5.1\n"," Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)\n","\u001b[K |████████████████████████████████| 596 kB 49.6 MB/s \n","\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers==4.18.0) (3.7.0)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.18.0) (2019.12.20)\n","Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers==4.18.0) (4.11.4)\n","Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers==4.18.0) (2.23.0)\n","Collecting tokenizers!=0.11.3,<0.13,>=0.11.1\n"," Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)\n","\u001b[K |████████████████████████████████| 6.6 MB 36.1 MB/s \n","\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.18.0) (1.21.6)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.1.0->transformers==4.18.0) (4.2.0)\n","Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers==4.18.0) (3.0.9)\n","Collecting xxhash\n"," Downloading xxhash-3.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)\n","\u001b[K |████████████████████████████████| 212 kB 55.8 MB/s \n","\u001b[?25hRequirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (6.0.1)\n","Collecting aiohttp\n"," Downloading aiohttp-3.8.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)\n","\u001b[K |████████████████████████████████| 1.1 MB 45.5 MB/s \n","\u001b[?25hCollecting fsspec[http]>=2021.05.0\n"," Downloading fsspec-2022.5.0-py3-none-any.whl (140 kB)\n","\u001b[K |████████████████████████████████| 140 kB 53.6 MB/s \n","\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from datasets) (1.3.5)\n","Collecting responses<0.19\n"," Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n","Collecting dill<0.3.5\n"," Downloading dill-0.3.4-py2.py3-none-any.whl (86 kB)\n","\u001b[K |████████████████████████████████| 86 kB 5.7 MB/s \n","\u001b[?25hRequirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets) (0.70.13)\n","Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.18.0) (1.24.3)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.18.0) (2022.5.18.1)\n","Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.18.0) (2.10)\n","Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.18.0) (3.0.4)\n","Collecting urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1\n"," Downloading urllib3-1.25.11-py2.py3-none-any.whl (127 kB)\n","\u001b[K |████████████████████████████████| 127 kB 52.3 MB/s \n","\u001b[?25hCollecting frozenlist>=1.1.1\n"," Downloading frozenlist-1.3.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (144 kB)\n","\u001b[K |████████████████████████████████| 144 kB 50.9 MB/s \n","\u001b[?25hRequirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (2.0.12)\n","Collecting multidict<7.0,>=4.5\n"," Downloading multidict-6.0.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (94 kB)\n","\u001b[K |████████████████████████████████| 94 kB 3.2 MB/s \n","\u001b[?25hRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (21.4.0)\n","Collecting async-timeout<5.0,>=4.0.0a3\n"," Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n","Collecting asynctest==0.13.0\n"," Downloading asynctest-0.13.0-py3-none-any.whl (26 kB)\n","Collecting yarl<2.0,>=1.0\n"," Downloading yarl-1.7.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (271 kB)\n","\u001b[K |████████████████████████████████| 271 kB 51.2 MB/s \n","\u001b[?25hCollecting aiosignal>=1.1.2\n"," Downloading aiosignal-1.2.0-py3-none-any.whl (8.2 kB)\n","Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers==4.18.0) (3.8.0)\n","Collecting multiprocess\n"," Downloading multiprocess-0.70.12.2-py37-none-any.whl (112 kB)\n","\u001b[K |████████████████████████████████| 112 kB 48.2 MB/s \n","\u001b[?25hRequirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2022.1)\n","Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2.8.2)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->datasets) (1.15.0)\n","Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers==4.18.0) (7.1.2)\n","Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers==4.18.0) (1.1.0)\n","Building wheels for collected packages: sacremoses\n"," Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for sacremoses: filename=sacremoses-0.0.53-py3-none-any.whl size=895260 sha256=cca626dcce098f80c7609e89b9d7ae0782c9bc69e80da67f7a8ca172615ec417\n"," Stored in directory: /root/.cache/pip/wheels/87/39/dd/a83eeef36d0bf98e7a4d1933a4ad2d660295a40613079bafc9\n","Successfully built sacremoses\n","Installing collected packages: multidict, frozenlist, yarl, urllib3, asynctest, async-timeout, aiosignal, pyyaml, fsspec, dill, aiohttp, xxhash, tokenizers, sacremoses, responses, multiprocess, huggingface-hub, transformers, sentencepiece, datasets\n"," Attempting uninstall: urllib3\n"," Found existing installation: urllib3 1.24.3\n"," Uninstalling urllib3-1.24.3:\n"," Successfully uninstalled urllib3-1.24.3\n"," Attempting uninstall: pyyaml\n"," Found existing installation: PyYAML 3.13\n"," Uninstalling PyYAML-3.13:\n"," Successfully uninstalled PyYAML-3.13\n"," Attempting uninstall: dill\n"," Found existing installation: dill 0.3.5.1\n"," Uninstalling dill-0.3.5.1:\n"," Successfully uninstalled dill-0.3.5.1\n"," Attempting uninstall: multiprocess\n"," Found existing installation: multiprocess 0.70.13\n"," Uninstalling multiprocess-0.70.13:\n"," Successfully uninstalled multiprocess-0.70.13\n","\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n","datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.\u001b[0m\n","Successfully installed aiohttp-3.8.1 aiosignal-1.2.0 async-timeout-4.0.2 asynctest-0.13.0 datasets-2.2.2 dill-0.3.4 frozenlist-1.3.0 fsspec-2022.5.0 huggingface-hub-0.7.0 multidict-6.0.2 multiprocess-0.70.12.2 pyyaml-6.0 responses-0.18.0 sacremoses-0.0.53 sentencepiece-0.1.96 tokenizers-0.12.1 transformers-4.18.0 urllib3-1.25.11 xxhash-3.0.0 yarl-1.7.2\n"]}],"source":["!pip install datasets transformers==4.18.0 sentencepiece"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"cJ5SzANft-Lx"},"outputs":[],"source":["from datasets import *\n","from transformers import *\n","from tokenizers import *\n","import os\n","import json"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":166,"referenced_widgets":["7aa9b4e351a34a84a8aa6ad49aa5d74d","2f22301fd2984090be8adb6fe839e393","98d3d993e19d4f448cbca02235a850ac","8bec66cc71aa433ea55697d640988262","b45af68d650f445d97876e9b51d3f15a","e33fa1b90e9947e8858db2ef44969e37","f6363254e1fd40b88ec971851ad7e441","89fdcc82f49f4ab2b498bb5d46f9b73b","d34ef4b4dfb540e28f878df61f27ff26","6aa0294bb5b741f49883998b69accaba","f6721fa0034043138705c565a4b77b77","4df3ac00cfeb4441beb0c077578ce793","e549c9c30ce44951b93b1f9d4d1cfca1","9f7e7e08223343d5b78d5c2d8855640d","4aaff7ef487c4b5c915b2def2ab21759","99bbccd66c66489b96470d3e9caf1f1f","c082792cfdde4faab6bea631addceb00","2e10e57221ef46d695eb16fd25ec5e49","a77ad1702bf7439f87f7b1084d278717","241cca42438046aea2a9b4874f37c8b1","ba0b6327ac3740f79f66cb54d131f4fa","16fd5817ade84d92abeebb70952c926f","3f27b9cc5f104665a99a996c7ab3af1c","c73ea971834643fab70be84563d06f6a","653752175e3445ee8fd4651bd551b34d","34e85e0a8cf448828e27cb266626cb27","93b2d9dd8440496f8d1812993530dc05","fa06a799cfe8477a8e3a99a6dd99ca27","d4d1386f42534f8584d0c1e0428bd65b","788f92dcba3f4148bc4e88b5c4f9b28b","cfcf5950147d45e0bc3c8689b5b76073","5837dd25ab0645939444b94ab35e5db4","d78152622ecf4f3da35756557a802251","450625b8b8cb4ea18bd6e8d0807c0830","123f86c229c24496979269c09256d1e6","cdcc3c356d91458ba4be2f1a8b41f9da","66e0498023a64a109f4e18e030937e5e","bce52428773848faba37e3a41747b4e9","6d6b854ddcbc4113b941c8ba804e2877","e4be24ca306d4a5c8d4a8a1718225590","7a3d34b2e76a4d4b8b14ac5aefb3883f","ffd1f3803c154f68b9b921cfefc00604","4801d49b04044fa79f64afb3e4d0d89c","599a2e48109c4b25840754625c05af43"]},"executionInfo":{"elapsed":286202,"status":"ok","timestamp":1654203680599,"user":{"displayName":"Abdou Rockikz","userId":"01706581904475440028"},"user_tz":-120},"id":"QEvDxUpYuARd","outputId":"c0615e23-7592-4fb4-da1e-f33941fbb02b"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"7aa9b4e351a34a84a8aa6ad49aa5d74d","version_major":2,"version_minor":0},"text/plain":["Downloading builder script: 0%| | 0.00/1.75k [00:00\", \"\"\n","]\n","# if you want to train the tokenizer on both sets\n","# files = [\"train.txt\", \"test.txt\"]\n","# training the tokenizer on the training set\n","files = [\"train.txt\"]\n","# 30,522 vocab is BERT's default vocab size, feel free to tweak\n","vocab_size = 30_522\n","# maximum sequence length, lowering will result to faster training (when increasing batch size)\n","max_length = 512\n","# whether to truncate\n","truncate_longer_samples = False"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true},"id":"-CVoZ3bC_j6K"},"outputs":[],"source":["# initialize the WordPiece tokenizer\n","tokenizer = BertWordPieceTokenizer()\n","# train the tokenizer\n","tokenizer.train(files=files, vocab_size=vocab_size, special_tokens=special_tokens)\n","# enable truncation up to the maximum 512 tokens\n","tokenizer.enable_truncation(max_length=max_length)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true},"id":"vix0oz7XzI_w"},"outputs":[],"source":["model_path = \"pretrained-bert\"\n","# make the directory if not already there\n","if not os.path.isdir(model_path):\n"," os.mkdir(model_path)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true},"id":"vmeI9Vgx06VB","outputId":"5ce209ce-dd99-45a0-ed54-f42124be7305"},"outputs":[{"data":{"text/plain":["['pretrained-bert/vocab.txt']"]},"execution_count":null,"metadata":{},"output_type":"execute_result"}],"source":["# save the tokenizer \n","tokenizer.save_model(model_path)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true},"id":"d-HZAthp0SNk"},"outputs":[],"source":["# dumping some of the tokenizer config to config file, \n","# including special tokens, whether to lower case and the maximum sequence length\n","with open(os.path.join(model_path, \"config.json\"), \"w\") as f:\n"," tokenizer_cfg = {\n"," \"do_lower_case\": True,\n"," \"unk_token\": \"[UNK]\",\n"," \"sep_token\": \"[SEP]\",\n"," \"pad_token\": \"[PAD]\",\n"," \"cls_token\": \"[CLS]\",\n"," \"mask_token\": \"[MASK]\",\n"," \"model_max_length\": max_length,\n"," \"max_len\": max_length,\n"," }\n"," json.dump(tokenizer_cfg, f)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true},"id":"OkJ_tU4B0jNf","outputId":"a632ee1e-b82d-4967-a83b-7ed4a70333c3"},"outputs":[{"name":"stderr","output_type":"stream","text":["Didn't find file pretrained-bert/tokenizer.json. We won't load it.\n","Didn't find file pretrained-bert/added_tokens.json. We won't load it.\n","Didn't find file pretrained-bert/special_tokens_map.json. We won't load it.\n","Didn't find file pretrained-bert/tokenizer_config.json. We won't load it.\n","loading file pretrained-bert/vocab.txt\n","loading file None\n","loading file None\n","loading file None\n","loading file None\n","loading configuration file pretrained-bert/config.json\n","Model config BertConfig {\n"," \"_name_or_path\": \"pretrained-bert\",\n"," \"attention_probs_dropout_prob\": 0.1,\n"," \"classifier_dropout\": null,\n"," \"cls_token\": \"[CLS]\",\n"," \"do_lower_case\": true,\n"," \"hidden_act\": \"gelu\",\n"," \"hidden_dropout_prob\": 0.1,\n"," \"hidden_size\": 768,\n"," \"initializer_range\": 0.02,\n"," \"intermediate_size\": 3072,\n"," \"layer_norm_eps\": 1e-12,\n"," \"mask_token\": \"[MASK]\",\n"," \"max_len\": 512,\n"," \"max_position_embeddings\": 512,\n"," \"model_max_length\": 512,\n"," \"model_type\": \"bert\",\n"," \"num_attention_heads\": 12,\n"," \"num_hidden_layers\": 12,\n"," \"pad_token\": \"[PAD]\",\n"," \"pad_token_id\": 0,\n"," \"position_embedding_type\": \"absolute\",\n"," \"sep_token\": \"[SEP]\",\n"," \"transformers_version\": \"4.18.0\",\n"," \"type_vocab_size\": 2,\n"," \"unk_token\": \"[UNK]\",\n"," \"use_cache\": true,\n"," \"vocab_size\": 30522\n","}\n","\n","loading configuration file pretrained-bert/config.json\n","Model config BertConfig {\n"," \"_name_or_path\": \"pretrained-bert\",\n"," \"attention_probs_dropout_prob\": 0.1,\n"," \"classifier_dropout\": null,\n"," \"cls_token\": \"[CLS]\",\n"," \"do_lower_case\": true,\n"," \"hidden_act\": \"gelu\",\n"," \"hidden_dropout_prob\": 0.1,\n"," \"hidden_size\": 768,\n"," \"initializer_range\": 0.02,\n"," \"intermediate_size\": 3072,\n"," \"layer_norm_eps\": 1e-12,\n"," \"mask_token\": \"[MASK]\",\n"," \"max_len\": 512,\n"," \"max_position_embeddings\": 512,\n"," \"model_max_length\": 512,\n"," \"model_type\": \"bert\",\n"," \"num_attention_heads\": 12,\n"," \"num_hidden_layers\": 12,\n"," \"pad_token\": \"[PAD]\",\n"," \"pad_token_id\": 0,\n"," \"position_embedding_type\": \"absolute\",\n"," \"sep_token\": \"[SEP]\",\n"," \"transformers_version\": \"4.18.0\",\n"," \"type_vocab_size\": 2,\n"," \"unk_token\": \"[UNK]\",\n"," \"use_cache\": true,\n"," \"vocab_size\": 30522\n","}\n","\n"]}],"source":["# when the tokenizer is trained and configured, load it as BertTokenizerFast\n","tokenizer = BertTokenizerFast.from_pretrained(model_path)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true,"base_uri":"https://localhost:8080/","height":66,"referenced_widgets":["c3a30fb959aa47f889692b518b2c1664","bed4e885cf5d4b82a38833820b8e118f","4589cb842c7842ddb0e9bca6db71d590","3748fb75842f4392b40fbfab0b7c9caa","938c3b47fef24ad48b0ace7e7dcfcd80","f10afe04e61d4edeb33d8907a1192891","d84d85ce2d3f4dd491a44b97e653e175","54b4cf2d58ba4f87aec5070dbd1ff801","bc97183430e34db4b073305ce07d6f41","c082e56c91ce4bb4a4bb1e0b0001eaa2","6c082c2cd59f483981b4839dff47e071","62fe563ea6a74aa59833ce78423213da"]},"executionInfo":{"elapsed":1023609,"status":"ok","timestamp":1654205457093,"user":{"displayName":"Abdou Rockikz","userId":"01706581904475440028"},"user_tz":-120},"id":"sYw3cjdQ0pHT","outputId":"277e31b9-2391-4538-d02d-4458e23f3100"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"c3a30fb959aa47f889692b518b2c1664","version_major":2,"version_minor":0},"text/plain":[" 0%| | 0/638 [00:00= max_length:\n"," total_length = (total_length // max_length) * max_length\n"," # Split by chunks of max_len.\n"," result = {\n"," k: [t[i : i + max_length] for i in range(0, total_length, max_length)]\n"," for k, t in concatenated_examples.items()\n"," }\n"," return result\n","\n","# Note that with `batched=True`, this map processes 1,000 texts together, so group_texts throws away a\n","# remainder for each of those groups of 1,000 texts. You can adjust that batch_size here but a higher value\n","# might be slower to preprocess.\n","#\n","# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:\n","# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map\n","if not truncate_longer_samples:\n"," train_dataset = train_dataset.map(group_texts, batched=True,\n"," desc=f\"Grouping texts in chunks of {max_length}\")\n"," test_dataset = test_dataset.map(group_texts, batched=True,\n"," desc=f\"Grouping texts in chunks of {max_length}\")\n"," # convert them from lists to torch tensors\n"," train_dataset.set_format(\"torch\")\n"," test_dataset.set_format(\"torch\")"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true},"id":"dZ0oYZbk-SSh","outputId":"bf5b60bb-917a-42b9-eba8-531fa86df0f9"},"outputs":[{"data":{"text/plain":["(643843, 71357)"]},"execution_count":null,"metadata":{},"output_type":"execute_result"}],"source":["len(train_dataset), len(test_dataset)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true},"id":"Mslndt81024t"},"outputs":[],"source":["# initialize the model with the config\n","model_config = BertConfig(vocab_size=vocab_size, max_position_embeddings=max_length)\n","model = BertForMaskedLM(config=model_config)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true},"id":"kmFCTByJ1OI3"},"outputs":[],"source":["# initialize the data collator, randomly masking 20% (default is 15%) of the tokens for the Masked Language\n","# Modeling (MLM) task\n","data_collator = DataCollatorForLanguageModeling(\n"," tokenizer=tokenizer, mlm=True, mlm_probability=0.2\n",")"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true},"id":"IKJdnkAd1uYT","outputId":"81928d26-95d6-4805-a180-683af3a88a2e"},"outputs":[{"name":"stderr","output_type":"stream","text":["using `logging_steps` to initialize `eval_steps` to 1000\n","PyTorch: setting up devices\n","The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"]}],"source":["training_args = TrainingArguments(\n"," output_dir=model_path, # output directory to where save model checkpoint\n"," evaluation_strategy=\"steps\", # evaluate each `logging_steps` steps\n"," overwrite_output_dir=True, \n"," num_train_epochs=10, # number of training epochs, feel free to tweak\n"," per_device_train_batch_size=10, # the training batch size, put it as high as your GPU memory fits\n"," gradient_accumulation_steps=8, # accumulating the gradients before updating the weights\n"," per_device_eval_batch_size=64, # evaluation batch size\n"," logging_steps=1000, # evaluate, log and save model checkpoints every 1000 step\n"," save_steps=1000,\n"," # load_best_model_at_end=True, # whether to load the best model (in terms of loss) at the end of training\n"," # save_total_limit=3, # whether you don't have much space so you let only 3 model weights saved in the disk\n",")"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true},"id":"OMKVmXZN2o7c"},"outputs":[],"source":["# initialize the trainer and pass everything to it\n","trainer = Trainer(\n"," model=model,\n"," args=training_args,\n"," data_collator=data_collator,\n"," train_dataset=train_dataset,\n"," eval_dataset=test_dataset,\n",")"]},{"cell_type":"code","execution_count":21,"metadata":{"id":"HYsgN58E2tFD","colab":{"base_uri":"https://localhost:8080/","height":1000},"outputId":"bd4a522a-4fd4-4d4f-fce6-a9fc0cb4cbef","executionInfo":{"status":"error","timestamp":1654244955088,"user_tz":-60,"elapsed":1233370,"user":{"displayName":"Abdou Rockikz","userId":"01706581904475440028"}}},"outputs":[{"metadata":{"tags":null},"name":"stderr","output_type":"stream","text":["The following columns in the training set don't have a corresponding argument in `BertForMaskedLM.forward` and have been ignored: special_tokens_mask. If special_tokens_mask are not expected by `BertForMaskedLM.forward`, you can safely ignore this message.\n","/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n"," FutureWarning,\n","***** Running training *****\n"," Num examples = 643843\n"," Num Epochs = 10\n"," Instantaneous batch size per device = 10\n"," Total train batch size (w. parallel, distributed & accumulation) = 80\n"," Gradient Accumulation steps = 8\n"," Total optimization steps = 80480\n"]},{"data":{"text/html":["\n","
\n"," \n"," \n"," [ 6001/80480 10:33:18 < 131:02:39, 0.16 it/s, Epoch 0.75/10]\n","
\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StepTraining LossValidation Loss
10006.8608006.550845
20006.5187006.451167
30006.4317006.387487
40006.3766006.341373
50006.3323006.307063

\n","

\n"," \n"," \n"," [ 356/1115 07:19 < 15:40, 0.81 it/s]\n","
\n"," "],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"metadata":{"tags":null},"name":"stderr","output_type":"stream","text":["The following columns in the evaluation set don't have a corresponding argument in `BertForMaskedLM.forward` and have been ignored: special_tokens_mask. If special_tokens_mask are not expected by `BertForMaskedLM.forward`, you can safely ignore this message.\n","***** Running Evaluation *****\n"," Num examples = 71357\n"," Batch size = 64\n","Saving model checkpoint to pretrained-bert/checkpoint-1000\n","Configuration saved in pretrained-bert/checkpoint-1000/config.json\n","Model weights saved in pretrained-bert/checkpoint-1000/pytorch_model.bin\n","The following columns in the evaluation set don't have a corresponding argument in `BertForMaskedLM.forward` and have been ignored: special_tokens_mask. If special_tokens_mask are not expected by `BertForMaskedLM.forward`, you can safely ignore this message.\n","***** Running Evaluation *****\n"," Num examples = 71357\n"," Batch size = 64\n","Saving model checkpoint to pretrained-bert/checkpoint-2000\n","Configuration saved in pretrained-bert/checkpoint-2000/config.json\n","Model weights saved in pretrained-bert/checkpoint-2000/pytorch_model.bin\n","The following columns in the evaluation set don't have a corresponding argument in `BertForMaskedLM.forward` and have been ignored: special_tokens_mask. If special_tokens_mask are not expected by `BertForMaskedLM.forward`, you can safely ignore this message.\n","***** Running Evaluation *****\n"," Num examples = 71357\n"," Batch size = 64\n","Saving model checkpoint to pretrained-bert/checkpoint-3000\n","Configuration saved in pretrained-bert/checkpoint-3000/config.json\n","Model weights saved in pretrained-bert/checkpoint-3000/pytorch_model.bin\n","The following columns in the evaluation set don't have a corresponding argument in `BertForMaskedLM.forward` and have been ignored: special_tokens_mask. If special_tokens_mask are not expected by `BertForMaskedLM.forward`, you can safely ignore this message.\n","***** Running Evaluation *****\n"," Num examples = 71357\n"," Batch size = 64\n","Saving model checkpoint to pretrained-bert/checkpoint-4000\n","Configuration saved in pretrained-bert/checkpoint-4000/config.json\n","Model weights saved in pretrained-bert/checkpoint-4000/pytorch_model.bin\n","The following columns in the evaluation set don't have a corresponding argument in `BertForMaskedLM.forward` and have been ignored: special_tokens_mask. If special_tokens_mask are not expected by `BertForMaskedLM.forward`, you can safely ignore this message.\n","***** Running Evaluation *****\n"," Num examples = 71357\n"," Batch size = 64\n","Saving model checkpoint to pretrained-bert/checkpoint-5000\n","Configuration saved in pretrained-bert/checkpoint-5000/config.json\n","Model weights saved in pretrained-bert/checkpoint-5000/pytorch_model.bin\n","The following columns in the evaluation set don't have a corresponding argument in `BertForMaskedLM.forward` and have been ignored: special_tokens_mask. If special_tokens_mask are not expected by `BertForMaskedLM.forward`, you can safely ignore this message.\n","***** Running Evaluation *****\n"," Num examples = 71357\n"," Batch size = 64\n"]},{"output_type":"display_data","data":{"text/plain":[""],"text/html":["\n","
\n"," \n"," \n"," [ 6056/80480 11:01:09 < 135:27:46, 0.15 it/s, Epoch 0.75/10]\n","
\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StepTraining LossValidation Loss
10006.8608006.550845
20006.5187006.451167
30006.4317006.387487
40006.3766006.341373
50006.3323006.307063
60006.2989006.275374

"]},"metadata":{}},{"output_type":"stream","name":"stderr","text":["Saving model checkpoint to pretrained-bert/checkpoint-6000\n","Configuration saved in pretrained-bert/checkpoint-6000/config.json\n","Model weights saved in pretrained-bert/checkpoint-6000/pytorch_model.bin\n"]},{"output_type":"error","ename":"KeyboardInterrupt","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# train the model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1420\u001b[0m \u001b[0mtr_loss_step\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1421\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1422\u001b[0;31m \u001b[0mtr_loss_step\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1423\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1424\u001b[0m if (\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mtraining_step\u001b[0;34m(self, model, inputs)\u001b[0m\n\u001b[1;32m 2027\u001b[0m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdeepspeed\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2028\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2029\u001b[0;31m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2030\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2031\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/_tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m 361\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 362\u001b[0m inputs=inputs)\n\u001b[0;32m--> 363\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 364\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 365\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m 173\u001b[0m Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n\u001b[1;32m 174\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 175\u001b[0;31m allow_unreachable=True, accumulate_grad=True) # Calls into the C++ engine to run the backward pass\n\u001b[0m\u001b[1;32m 176\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 177\u001b[0m def grad(\n","\u001b[0;31mKeyboardInterrupt\u001b[0m: "]}],"source":["# train the model\n","trainer.train()"]},{"cell_type":"code","execution_count":25,"metadata":{"id":"dUZSRAxV2vp-","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1654245052073,"user_tz":-60,"elapsed":1995,"user":{"displayName":"Abdou Rockikz","userId":"01706581904475440028"}},"outputId":"9aac4c86-199d-4ba3-9b79-614ba8c97fe1"},"outputs":[{"output_type":"stream","name":"stderr","text":["loading configuration file pretrained-bert/checkpoint-6000/config.json\n","Model config BertConfig {\n"," \"architectures\": [\n"," \"BertForMaskedLM\"\n"," ],\n"," \"attention_probs_dropout_prob\": 0.1,\n"," \"classifier_dropout\": null,\n"," \"hidden_act\": \"gelu\",\n"," \"hidden_dropout_prob\": 0.1,\n"," \"hidden_size\": 768,\n"," \"initializer_range\": 0.02,\n"," \"intermediate_size\": 3072,\n"," \"layer_norm_eps\": 1e-12,\n"," \"max_position_embeddings\": 512,\n"," \"model_type\": \"bert\",\n"," \"num_attention_heads\": 12,\n"," \"num_hidden_layers\": 12,\n"," \"pad_token_id\": 0,\n"," \"position_embedding_type\": \"absolute\",\n"," \"torch_dtype\": \"float32\",\n"," \"transformers_version\": \"4.18.0\",\n"," \"type_vocab_size\": 2,\n"," \"use_cache\": true,\n"," \"vocab_size\": 30522\n","}\n","\n","loading weights file pretrained-bert/checkpoint-6000/pytorch_model.bin\n","All model checkpoint weights were used when initializing BertForMaskedLM.\n","\n","All the weights of BertForMaskedLM were initialized from the model checkpoint at pretrained-bert/checkpoint-6000.\n","If your task is similar to the task the model of the checkpoint was trained on, you can already use BertForMaskedLM for predictions without further training.\n","Didn't find file pretrained-bert/tokenizer.json. We won't load it.\n","Didn't find file pretrained-bert/added_tokens.json. We won't load it.\n","Didn't find file pretrained-bert/special_tokens_map.json. We won't load it.\n","Didn't find file pretrained-bert/tokenizer_config.json. We won't load it.\n","loading file pretrained-bert/vocab.txt\n","loading file None\n","loading file None\n","loading file None\n","loading file None\n","loading configuration file pretrained-bert/config.json\n","Model config BertConfig {\n"," \"_name_or_path\": \"pretrained-bert\",\n"," \"attention_probs_dropout_prob\": 0.1,\n"," \"classifier_dropout\": null,\n"," \"cls_token\": \"[CLS]\",\n"," \"do_lower_case\": true,\n"," \"hidden_act\": \"gelu\",\n"," \"hidden_dropout_prob\": 0.1,\n"," \"hidden_size\": 768,\n"," \"initializer_range\": 0.02,\n"," \"intermediate_size\": 3072,\n"," \"layer_norm_eps\": 1e-12,\n"," \"mask_token\": \"[MASK]\",\n"," \"max_len\": 512,\n"," \"max_position_embeddings\": 512,\n"," \"model_max_length\": 512,\n"," \"model_type\": \"bert\",\n"," \"num_attention_heads\": 12,\n"," \"num_hidden_layers\": 12,\n"," \"pad_token\": \"[PAD]\",\n"," \"pad_token_id\": 0,\n"," \"position_embedding_type\": \"absolute\",\n"," \"sep_token\": \"[SEP]\",\n"," \"transformers_version\": \"4.18.0\",\n"," \"type_vocab_size\": 2,\n"," \"unk_token\": \"[UNK]\",\n"," \"use_cache\": true,\n"," \"vocab_size\": 30522\n","}\n","\n","loading configuration file pretrained-bert/config.json\n","Model config BertConfig {\n"," \"_name_or_path\": \"pretrained-bert\",\n"," \"attention_probs_dropout_prob\": 0.1,\n"," \"classifier_dropout\": null,\n"," \"cls_token\": \"[CLS]\",\n"," \"do_lower_case\": true,\n"," \"hidden_act\": \"gelu\",\n"," \"hidden_dropout_prob\": 0.1,\n"," \"hidden_size\": 768,\n"," \"initializer_range\": 0.02,\n"," \"intermediate_size\": 3072,\n"," \"layer_norm_eps\": 1e-12,\n"," \"mask_token\": \"[MASK]\",\n"," \"max_len\": 512,\n"," \"max_position_embeddings\": 512,\n"," \"model_max_length\": 512,\n"," \"model_type\": \"bert\",\n"," \"num_attention_heads\": 12,\n"," \"num_hidden_layers\": 12,\n"," \"pad_token\": \"[PAD]\",\n"," \"pad_token_id\": 0,\n"," \"position_embedding_type\": \"absolute\",\n"," \"sep_token\": \"[SEP]\",\n"," \"transformers_version\": \"4.18.0\",\n"," \"type_vocab_size\": 2,\n"," \"unk_token\": \"[UNK]\",\n"," \"use_cache\": true,\n"," \"vocab_size\": 30522\n","}\n","\n"]}],"source":["# when you load from pretrained\n","model = BertForMaskedLM.from_pretrained(os.path.join(model_path, \"checkpoint-6000\"))\n","tokenizer = BertTokenizerFast.from_pretrained(model_path)\n","# or simply use pipeline\n","fill_mask = pipeline(\"fill-mask\", model=model, tokenizer=tokenizer)"]},{"cell_type":"code","execution_count":27,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":434,"status":"ok","timestamp":1654245081424,"user":{"displayName":"Abdou Rockikz","userId":"01706581904475440028"},"user_tz":-60},"id":"vJO-1w15ARHs","outputId":"346b2c7b-d65b-44f1-9fca-e3493435aca2"},"outputs":[{"output_type":"stream","name":"stdout","text":["{'score': 0.06537885963916779, 'token': 1556, 'token_str': 'the', 'sequence': 'it is known that the is the capital of germany'}\n","{'score': 0.036817438900470734, 'token': 20, 'token_str': '.', 'sequence': 'it is known that. is the capital of germany'}\n","{'score': 0.0335884727537632, 'token': 18, 'token_str': ',', 'sequence': 'it is known that, is the capital of germany'}\n","{'score': 0.027838902547955513, 'token': 1573, 'token_str': 'of', 'sequence': 'it is known that of is the capital of germany'}\n","{'score': 0.027804739773273468, 'token': 1609, 'token_str': 'is', 'sequence': 'it is known that is is the capital of germany'}\n"]}],"source":["# perform predictions\n","example = \"It is known that [MASK] is the capital of Germany\"\n","for prediction in fill_mask(example):\n"," print(prediction)"]},{"cell_type":"code","source":["# perform predictions\n","examples = [\n"," \"Today's most trending hashtags on [MASK] is Donald Trump\",\n"," \"The [MASK] was cloudy yesterday, but today it's rainy.\",\n","]\n","for example in examples:\n"," for prediction in fill_mask(example):\n"," print(f\"{prediction['sequence']}, confidence: {prediction['score']}\")\n"," print(\"=\"*50)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"8ROoCqpssCb9","executionInfo":{"status":"ok","timestamp":1654245056254,"user_tz":-60,"elapsed":472,"user":{"displayName":"Abdou Rockikz","userId":"01706581904475440028"}},"outputId":"cb795c9c-b77d-42ed-c779-0cf963fcddd2"},"execution_count":26,"outputs":[{"output_type":"stream","name":"stdout","text":["today's most trending hashtags on trump is donald trump, confidence: 0.05097166821360588\n","today's most trending hashtags on. is donald trump, confidence: 0.04177526384592056\n","today's most trending hashtags on'is donald trump, confidence: 0.040809836238622665\n","today's most trending hashtags on the is donald trump, confidence: 0.03832641988992691\n","today's most trending hashtags on, is donald trump, confidence: 0.024022724479436874\n","==================================================\n","the. was cloudy yesterday, but today it's rainy., confidence: 0.0627809464931488\n","the the was cloudy yesterday, but today it's rainy., confidence: 0.0463297963142395\n","the, was cloudy yesterday, but today it's rainy., confidence: 0.03323638439178467\n","the to was cloudy yesterday, but today it's rainy., confidence: 0.025685036554932594\n","the'was cloudy yesterday, but today it's rainy., confidence: 0.024147875607013702\n","==================================================\n"]}]},{"cell_type":"code","execution_count":28,"metadata":{"id":"gGkOvmFaYkF2","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1654245174475,"user_tz":-60,"elapsed":21,"user":{"displayName":"Abdou Rockikz","userId":"01706581904475440028"}},"outputId":"8deff2cf-85dd-42ef-eb1d-4a03a78cc9fc"},"outputs":[{"output_type":"stream","name":"stdout","text":["Fri Jun 3 08:32:51 2022 \n","+-----------------------------------------------------------------------------+\n","| NVIDIA-SMI 460.32.03 Driver Version: 460.32.03 CUDA Version: 11.2 |\n","|-------------------------------+----------------------+----------------------+\n","| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n","| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n","| | | MIG M. |\n","|===============================+======================+======================|\n","| 0 Tesla P100-PCIE... Off | 00000000:00:04.0 Off | 0 |\n","| N/A 52C P0 38W / 250W | 14725MiB / 16280MiB | 0% Default |\n","| | | N/A |\n","+-------------------------------+----------------------+----------------------+\n"," \n","+-----------------------------------------------------------------------------+\n","| Processes: |\n","| GPU GI CI PID Type Process name GPU Memory |\n","| ID ID Usage |\n","|=============================================================================|\n","+-----------------------------------------------------------------------------+\n"]}],"source":["!nvidia-smi"]}],"metadata":{"accelerator":"GPU","colab":{"collapsed_sections":[],"name":"PretrainingBERT_PythonCodeTutorial.ipynb","provenance":[],"authorship_tag":"ABX9TyNWY6XaAImHlWbcZBWbZnfu"},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"},"widgets":{"application/vnd.jupyter.widget-state+json":{"123f86c229c24496979269c09256d1e6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6d6b854ddcbc4113b941c8ba804e2877","placeholder":"​","style":"IPY_MODEL_e4be24ca306d4a5c8d4a8a1718225590","value":"Generating train split: 100%"}},"16fd5817ade84d92abeebb70952c926f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"241cca42438046aea2a9b4874f37c8b1":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"2e10e57221ef46d695eb16fd25ec5e49":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2f22301fd2984090be8adb6fe839e393":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e33fa1b90e9947e8858db2ef44969e37","placeholder":"​","style":"IPY_MODEL_f6363254e1fd40b88ec971851ad7e441","value":"Downloading builder script: "}},"34e85e0a8cf448828e27cb266626cb27":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_5837dd25ab0645939444b94ab35e5db4","placeholder":"​","style":"IPY_MODEL_d78152622ecf4f3da35756557a802251","value":" 845M/845M [00:24<00:00, 37.4MB/s]"}},"3748fb75842f4392b40fbfab0b7c9caa":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_bc97183430e34db4b073305ce07d6f41","max":71,"min":0,"orientation":"horizontal","style":"IPY_MODEL_c082e56c91ce4bb4a4bb1e0b0001eaa2","value":71}},"3f27b9cc5f104665a99a996c7ab3af1c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_c73ea971834643fab70be84563d06f6a","IPY_MODEL_653752175e3445ee8fd4651bd551b34d","IPY_MODEL_34e85e0a8cf448828e27cb266626cb27"],"layout":"IPY_MODEL_93b2d9dd8440496f8d1812993530dc05"}},"450625b8b8cb4ea18bd6e8d0807c0830":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_123f86c229c24496979269c09256d1e6","IPY_MODEL_cdcc3c356d91458ba4be2f1a8b41f9da","IPY_MODEL_66e0498023a64a109f4e18e030937e5e"],"layout":"IPY_MODEL_bce52428773848faba37e3a41747b4e9"}},"4589cb842c7842ddb0e9bca6db71d590":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d84d85ce2d3f4dd491a44b97e653e175","placeholder":"​","style":"IPY_MODEL_54b4cf2d58ba4f87aec5070dbd1ff801","value":"100%"}},"4801d49b04044fa79f64afb3e4d0d89c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4aaff7ef487c4b5c915b2def2ab21759":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ba0b6327ac3740f79f66cb54d131f4fa","placeholder":"​","style":"IPY_MODEL_16fd5817ade84d92abeebb70952c926f","value":" 2.04k/? [00:00<00:00, 47.0kB/s]"}},"4df3ac00cfeb4441beb0c077578ce793":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e549c9c30ce44951b93b1f9d4d1cfca1","IPY_MODEL_9f7e7e08223343d5b78d5c2d8855640d","IPY_MODEL_4aaff7ef487c4b5c915b2def2ab21759"],"layout":"IPY_MODEL_99bbccd66c66489b96470d3e9caf1f1f"}},"50163d0ddc164a139121adf8f9310e36":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_40d2d394b8c24beaaa485d7c30dac2ac","IPY_MODEL_6a02439ddba246679fb53b91ccca4d2c","IPY_MODEL_1b57fe0adf5641ddb23713fa97cf28b6"],"layout":"IPY_MODEL_f36b2a7aa3944a5e856e5b17d286a488"}},"54b4cf2d58ba4f87aec5070dbd1ff801":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"5837dd25ab0645939444b94ab35e5db4":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"599a2e48109c4b25840754625c05af43":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"62fe563ea6a74aa59833ce78423213da":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"653752175e3445ee8fd4651bd551b34d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_788f92dcba3f4148bc4e88b5c4f9b28b","max":845131146,"min":0,"orientation":"horizontal","style":"IPY_MODEL_cfcf5950147d45e0bc3c8689b5b76073","value":845131146}},"66e0498023a64a109f4e18e030937e5e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4801d49b04044fa79f64afb3e4d0d89c","placeholder":"​","style":"IPY_MODEL_599a2e48109c4b25840754625c05af43","value":" 708111/708241 [04:17<00:00, 2898.45 examples/s]"}},"6aa0294bb5b741f49883998b69accaba":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6c082c2cd59f483981b4839dff47e071":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6d6b854ddcbc4113b941c8ba804e2877":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"788f92dcba3f4148bc4e88b5c4f9b28b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7a3d34b2e76a4d4b8b14ac5aefb3883f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7aa9b4e351a34a84a8aa6ad49aa5d74d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_2f22301fd2984090be8adb6fe839e393","IPY_MODEL_98d3d993e19d4f448cbca02235a850ac","IPY_MODEL_8bec66cc71aa433ea55697d640988262"],"layout":"IPY_MODEL_b45af68d650f445d97876e9b51d3f15a"}},"89fdcc82f49f4ab2b498bb5d46f9b73b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8bec66cc71aa433ea55697d640988262":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6aa0294bb5b741f49883998b69accaba","placeholder":"​","style":"IPY_MODEL_f6721fa0034043138705c565a4b77b77","value":" 4.38k/? [00:00<00:00, 82.1kB/s]"}},"938c3b47fef24ad48b0ace7e7dcfcd80":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6c082c2cd59f483981b4839dff47e071","placeholder":"​","style":"IPY_MODEL_62fe563ea6a74aa59833ce78423213da","value":" 71/71 [01:46<00:00, 1.42s/ba]"}},"93b2d9dd8440496f8d1812993530dc05":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"98d3d993e19d4f448cbca02235a850ac":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_89fdcc82f49f4ab2b498bb5d46f9b73b","max":1746,"min":0,"orientation":"horizontal","style":"IPY_MODEL_d34ef4b4dfb540e28f878df61f27ff26","value":1746}},"99bbccd66c66489b96470d3e9caf1f1f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9f7e7e08223343d5b78d5c2d8855640d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_a77ad1702bf7439f87f7b1084d278717","max":932,"min":0,"orientation":"horizontal","style":"IPY_MODEL_241cca42438046aea2a9b4874f37c8b1","value":932}},"a77ad1702bf7439f87f7b1084d278717":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b45af68d650f445d97876e9b51d3f15a":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ba0b6327ac3740f79f66cb54d131f4fa":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bc97183430e34db4b073305ce07d6f41":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bce52428773848faba37e3a41747b4e9":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bed4e885cf5d4b82a38833820b8e118f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4589cb842c7842ddb0e9bca6db71d590","IPY_MODEL_3748fb75842f4392b40fbfab0b7c9caa","IPY_MODEL_938c3b47fef24ad48b0ace7e7dcfcd80"],"layout":"IPY_MODEL_f10afe04e61d4edeb33d8907a1192891"}},"c082792cfdde4faab6bea631addceb00":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c082e56c91ce4bb4a4bb1e0b0001eaa2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"c73ea971834643fab70be84563d06f6a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_fa06a799cfe8477a8e3a99a6dd99ca27","placeholder":"​","style":"IPY_MODEL_d4d1386f42534f8584d0c1e0428bd65b","value":"Downloading data: 100%"}},"cdcc3c356d91458ba4be2f1a8b41f9da":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"","description":"","description_tooltip":null,"layout":"IPY_MODEL_7a3d34b2e76a4d4b8b14ac5aefb3883f","max":708241,"min":0,"orientation":"horizontal","style":"IPY_MODEL_ffd1f3803c154f68b9b921cfefc00604","value":708241}},"cfcf5950147d45e0bc3c8689b5b76073":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"d34ef4b4dfb540e28f878df61f27ff26":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"d4d1386f42534f8584d0c1e0428bd65b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d78152622ecf4f3da35756557a802251":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d84d85ce2d3f4dd491a44b97e653e175":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e33fa1b90e9947e8858db2ef44969e37":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e4be24ca306d4a5c8d4a8a1718225590":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e549c9c30ce44951b93b1f9d4d1cfca1":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c082792cfdde4faab6bea631addceb00","placeholder":"​","style":"IPY_MODEL_2e10e57221ef46d695eb16fd25ec5e49","value":"Downloading metadata: "}},"f10afe04e61d4edeb33d8907a1192891":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f6363254e1fd40b88ec971851ad7e441":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f6721fa0034043138705c565a4b77b77":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"fa06a799cfe8477a8e3a99a6dd99ca27":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ffd1f3803c154f68b9b921cfefc00604":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}}}}},"nbformat":4,"nbformat_minor":0}