{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "c3af7c60-ba26-4f75-bbe9-664347299dca", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Defaulting to user installation because normal site-packages is not writeable\n", "Collecting transformers\n", " Downloading transformers-4.39.1-py3-none-any.whl.metadata (134 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting datasets\n", " Downloading datasets-2.18.0-py3-none-any.whl.metadata (20 kB)\n", "Collecting accelerate\n", " Downloading accelerate-0.28.0-py3-none-any.whl.metadata (18 kB)\n", "Requirement already satisfied: filelock in /usr/lib/python3/dist-packages (from transformers) (3.6.0)\n", "Collecting huggingface-hub<1.0,>=0.19.3 (from transformers)\n", " Downloading huggingface_hub-0.22.1-py3-none-any.whl.metadata (12 kB)\n", "Requirement already satisfied: numpy>=1.17 in ./.local/lib/python3.10/site-packages (from transformers) (1.25.2)\n", "Requirement already satisfied: packaging>=20.0 in /usr/lib/python3/dist-packages (from transformers) (21.3)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/lib/python3/dist-packages (from transformers) (5.4.1)\n", "Collecting regex!=2019.12.17 (from transformers)\n", " Downloading regex-2023.12.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.9/40.9 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: requests in ./.local/lib/python3.10/site-packages (from transformers) (2.31.0)\n", "Collecting tokenizers<0.19,>=0.14 (from transformers)\n", " Downloading tokenizers-0.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n", "Collecting safetensors>=0.4.1 (from transformers)\n", " Downloading safetensors-0.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n", "Requirement already satisfied: tqdm>=4.27 in ./.local/lib/python3.10/site-packages (from transformers) (4.66.1)\n", "Collecting pyarrow>=12.0.0 (from datasets)\n", " Downloading pyarrow-15.0.2-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.0 kB)\n", "Collecting pyarrow-hotfix (from datasets)\n", " Downloading pyarrow_hotfix-0.6-py3-none-any.whl.metadata (3.6 kB)\n", "Collecting dill<0.3.9,>=0.3.0 (from datasets)\n", " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", "Requirement already satisfied: pandas in /usr/lib/python3/dist-packages (from datasets) (1.3.5)\n", "Collecting xxhash (from datasets)\n", " Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", "Collecting multiprocess (from datasets)\n", " Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)\n", "Collecting fsspec<=2024.2.0,>=2023.1.0 (from fsspec[http]<=2024.2.0,>=2023.1.0->datasets)\n", " Downloading fsspec-2024.2.0-py3-none-any.whl.metadata (6.8 kB)\n", "Collecting aiohttp (from datasets)\n", " Downloading aiohttp-3.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.4 kB)\n", "Requirement already satisfied: psutil in /usr/lib/python3/dist-packages (from accelerate) (5.9.0)\n", "Requirement already satisfied: torch>=1.10.0 in /usr/lib/python3/dist-packages (from accelerate) (2.0.1)\n", "Collecting aiosignal>=1.1.2 (from aiohttp->datasets)\n", " Downloading aiosignal-1.3.1-py3-none-any.whl.metadata (4.0 kB)\n", "Requirement already satisfied: attrs>=17.3.0 in ./.local/lib/python3.10/site-packages (from aiohttp->datasets) (23.1.0)\n", "Collecting frozenlist>=1.1.1 (from aiohttp->datasets)\n", " Downloading frozenlist-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", "Collecting multidict<7.0,>=4.5 (from aiohttp->datasets)\n", " Downloading multidict-6.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)\n", "Collecting yarl<2.0,>=1.0 (from aiohttp->datasets)\n", " Downloading yarl-1.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (31 kB)\n", "Collecting async-timeout<5.0,>=4.0 (from aiohttp->datasets)\n", " Downloading async_timeout-4.0.3-py3-none-any.whl.metadata (4.2 kB)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in ./.local/lib/python3.10/site-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (4.8.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in ./.local/lib/python3.10/site-packages (from requests->transformers) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests->transformers) (3.3)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/lib/python3/dist-packages (from requests->transformers) (1.26.5)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests->transformers) (2020.6.20)\n", "Downloading transformers-4.39.1-py3-none-any.whl (8.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.8/8.8 MB\u001b[0m \u001b[31m208.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading datasets-2.18.0-py3-none-any.whl (510 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.5/510.5 kB\u001b[0m \u001b[31m80.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading accelerate-0.28.0-py3-none-any.whl (290 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m290.1/290.1 kB\u001b[0m \u001b[31m59.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m24.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading fsspec-2024.2.0-py3-none-any.whl (170 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m170.9/170.9 kB\u001b[0m \u001b[31m33.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading aiohttp-3.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m136.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading huggingface_hub-0.22.1-py3-none-any.whl (388 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m388.6/388.6 kB\u001b[0m \u001b[31m66.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pyarrow-15.0.2-cp310-cp310-manylinux_2_28_x86_64.whl (38.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m38.3/38.3 MB\u001b[0m \u001b[31m123.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading regex-2023.12.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (773 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m774.0/774.0 kB\u001b[0m \u001b[31m97.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading safetensors-0.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m125.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading tokenizers-0.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m194.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m30.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\n", "Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m50.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n", "Downloading async_timeout-4.0.3-py3-none-any.whl (5.7 kB)\n", "Downloading frozenlist-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (239 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m239.5/239.5 kB\u001b[0m \u001b[31m45.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading multidict-6.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (124 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.3/124.3 kB\u001b[0m \u001b[31m29.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading yarl-1.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (301 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m301.6/301.6 kB\u001b[0m \u001b[31m61.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h\u001b[33mDEPRECATION: flatbuffers 1.12.1-git20200711.33e2d80-dfsg1-0.6 has a non-standard version number. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of flatbuffers or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", "\u001b[0mInstalling collected packages: xxhash, safetensors, regex, pyarrow-hotfix, pyarrow, multidict, fsspec, frozenlist, dill, async-timeout, yarl, multiprocess, huggingface-hub, aiosignal, tokenizers, aiohttp, accelerate, transformers, datasets\n", "Successfully installed accelerate-0.28.0 aiohttp-3.9.3 aiosignal-1.3.1 async-timeout-4.0.3 datasets-2.18.0 dill-0.3.8 frozenlist-1.4.1 fsspec-2024.2.0 huggingface-hub-0.22.1 multidict-6.0.5 multiprocess-0.70.16 pyarrow-15.0.2 pyarrow-hotfix-0.6 regex-2023.12.25 safetensors-0.4.2 tokenizers-0.15.2 transformers-4.39.1 xxhash-3.4.1 yarl-1.9.4\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n" ] } ], "source": [ "! pip install transformers datasets accelerate" ] }, { "cell_type": "code", "execution_count": 2, "id": "0c24abf0-926e-4c37-9713-58dffe06ed03", "metadata": {}, "outputs": [], "source": [ "GLUE_TASKS = [\"cola\", \"mnli\", \"mnli-mm\", \"mrpc\", \"qnli\", \"qqp\", \"rte\", \"sst2\", \"stsb\", \"wnli\"]" ] }, { "cell_type": "code", "execution_count": 3, "id": "390d5322-3f72-49e5-b001-f66d943f0c2c", "metadata": {}, "outputs": [], "source": [ "task = \"cola\"\n", "model_checkpoint = \"distilbert-base-uncased\"\n", "batch_size = 16" ] }, { "cell_type": "code", "execution_count": 4, "id": "bece75f9-a5a2-45a6-aef0-33a2fafd6262", "metadata": {}, "outputs": [], "source": [ "from datasets import load_dataset, load_metric" ] }, { "cell_type": "code", "execution_count": 5, "id": "a3bfef60-bd97-434e-9b83-560687ad4c08", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1316f9ea215b4c99b67f5278ac5061fd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading readme: 0%| | 0.00/35.3k [00:00=1.17.3 and <1.25.0 is required for this version of SciPy (detected version 1.25.2\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "Downloading data: 100%|██████████| 251k/251k [00:00<00:00, 1.00MB/s]\n", "Downloading data: 100%|██████████| 37.6k/37.6k [00:00<00:00, 251kB/s]\n", "Downloading data: 100%|██████████| 37.7k/37.7k [00:00<00:00, 242kB/s]\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a77c4b8db75c41bfbc994e0ecaf908cc", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating train split: 0%| | 0/8551 [00:00\n", " \n", " \n", " \n", " sentence\n", " label\n", " idx\n", " \n", " \n", " \n", " \n", " 0\n", " Mary jumped the horse perfectly over the last fence.\n", " acceptable\n", " 705\n", " \n", " \n", " 1\n", " John taught new students English Syntax.\n", " acceptable\n", " 3951\n", " \n", " \n", " 2\n", " This doll is hard to see it.\n", " unacceptable\n", " 5018\n", " \n", " \n", " 3\n", " I whipped the eggs from a puddle into a froth.\n", " unacceptable\n", " 2298\n", " \n", " \n", " 4\n", " Bill wants John to leave.\n", " acceptable\n", " 6157\n", " \n", " \n", " 5\n", " John expect to must leave.\n", " unacceptable\n", " 4481\n", " \n", " \n", " 6\n", " Bill's mother saw him.\n", " acceptable\n", " 7569\n", " \n", " \n", " 7\n", " Once Janet left, Fred became all the crazier.\n", " acceptable\n", " 226\n", " \n", " \n", " 8\n", " He's too reliable a man.\n", " acceptable\n", " 5440\n", " \n", " \n", " 9\n", " I wonder if she used paints.\n", " acceptable\n", " 7425\n", " \n", " \n", "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_random_elements(dataset[\"train\"])" ] }, { "cell_type": "code", "execution_count": 8, "id": "ce74eb02-1bf1-4ce9-b9f9-34ed0d7d1f8f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'matthews_correlation': 0.0416070055112537}" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "\n", "fake_preds = np.random.randint(0, 2, size=(64,))\n", "fake_labels = np.random.randint(0, 2, size=(64,))\n", "metric.compute(predictions=fake_preds, references=fake_labels)" ] }, { "cell_type": "code", "execution_count": 9, "id": "f5bd6db5-8786-477b-89a6-7ca21414f4ec", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9f5d7bb9f48b4c6b816427eeb8b5fe5d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer_config.json: 0%| | 0.00/28.0 [00:00\n", " \n", " \n", " [2675/2675 01:14, Epoch 5/5]\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossMatthews Correlation
10.5190000.4722180.430751
20.3498000.5021730.535758
30.2382000.6178000.541004
40.1734000.7442480.549477
50.1278000.8032360.550403

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "TrainOutput(global_step=2675, training_loss=0.27159803158768986, metrics={'train_runtime': 75.2661, 'train_samples_per_second': 568.051, 'train_steps_per_second': 35.541, 'total_flos': 229000686898068.0, 'train_loss': 0.27159803158768986, 'epoch': 5.0})" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trainer.train()" ] }, { "cell_type": "code", "execution_count": 22, "id": "e4106e5c-a37d-4e8f-b880-339e42daf57f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [66/66 00:00]\n", "
\n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "{'eval_loss': 0.8032358288764954,\n", " 'eval_matthews_correlation': 0.5504031254980248,\n", " 'eval_runtime': 0.3257,\n", " 'eval_samples_per_second': 3201.883,\n", " 'eval_steps_per_second': 202.612,\n", " 'epoch': 5.0}" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trainer.evaluate()" ] }, { "cell_type": "code", "execution_count": 23, "id": "703d1296-ce54-4281-b7d3-d487e545343a", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Defaulting to user installation because normal site-packages is not writeable\n", "Collecting optuna\n", " Downloading optuna-3.6.0-py3-none-any.whl.metadata (17 kB)\n", "Collecting alembic>=1.5.0 (from optuna)\n", " Downloading alembic-1.13.1-py3-none-any.whl.metadata (7.4 kB)\n", "Collecting colorlog (from optuna)\n", " Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB)\n", "Requirement already satisfied: numpy in ./.local/lib/python3.10/site-packages (from optuna) (1.25.2)\n", "Requirement already satisfied: packaging>=20.0 in /usr/lib/python3/dist-packages (from optuna) (21.3)\n", "Collecting sqlalchemy>=1.3.0 (from optuna)\n", " Downloading SQLAlchemy-2.0.29-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)\n", "Requirement already satisfied: tqdm in ./.local/lib/python3.10/site-packages (from optuna) (4.66.1)\n", "Requirement already satisfied: PyYAML in /usr/lib/python3/dist-packages (from optuna) (5.4.1)\n", "Collecting Mako (from alembic>=1.5.0->optuna)\n", " Downloading Mako-1.3.2-py3-none-any.whl.metadata (2.9 kB)\n", "Requirement already satisfied: typing-extensions>=4 in ./.local/lib/python3.10/site-packages (from alembic>=1.5.0->optuna) (4.8.0)\n", "Collecting greenlet!=0.4.17 (from sqlalchemy>=1.3.0->optuna)\n", " Downloading greenlet-3.0.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (3.8 kB)\n", "Requirement already satisfied: MarkupSafe>=0.9.2 in /usr/lib/python3/dist-packages (from Mako->alembic>=1.5.0->optuna) (2.0.1)\n", "Downloading optuna-3.6.0-py3-none-any.whl (379 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m379.9/379.9 kB\u001b[0m \u001b[31m27.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading alembic-1.13.1-py3-none-any.whl (233 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m233.4/233.4 kB\u001b[0m \u001b[31m68.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading SQLAlchemy-2.0.29-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m209.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading colorlog-6.8.2-py3-none-any.whl (11 kB)\n", "Downloading greenlet-3.0.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (616 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m616.0/616.0 kB\u001b[0m \u001b[31m127.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading Mako-1.3.2-py3-none-any.whl (78 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.7/78.7 kB\u001b[0m \u001b[31m25.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h\u001b[33mDEPRECATION: flatbuffers 1.12.1-git20200711.33e2d80-dfsg1-0.6 has a non-standard version number. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of flatbuffers or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", "\u001b[0mInstalling collected packages: Mako, greenlet, colorlog, sqlalchemy, alembic, optuna\n", "Successfully installed Mako-1.3.2 alembic-1.13.1 colorlog-6.8.2 greenlet-3.0.3 optuna-3.6.0 sqlalchemy-2.0.29\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Defaulting to user installation because normal site-packages is not writeable\n", "Collecting ray[tune]\n", " Downloading ray-2.10.0-cp310-cp310-manylinux2014_x86_64.whl.metadata (13 kB)\n", "Requirement already satisfied: click>=7.0 in /usr/lib/python3/dist-packages (from ray[tune]) (8.0.3)\n", "Requirement already satisfied: filelock in /usr/lib/python3/dist-packages (from ray[tune]) (3.6.0)\n", "Requirement already satisfied: jsonschema in ./.local/lib/python3.10/site-packages (from ray[tune]) (4.20.0)\n", "Requirement already satisfied: msgpack<2.0.0,>=1.0.0 in /usr/lib/python3/dist-packages (from ray[tune]) (1.0.3)\n", "Requirement already satisfied: packaging in /usr/lib/python3/dist-packages (from ray[tune]) (21.3)\n", "Requirement already satisfied: protobuf!=3.19.5,>=3.15.3 in /usr/lib/python3/dist-packages (from ray[tune]) (4.21.12)\n", "Requirement already satisfied: pyyaml in /usr/lib/python3/dist-packages (from ray[tune]) (5.4.1)\n", "Requirement already satisfied: aiosignal in ./.local/lib/python3.10/site-packages (from ray[tune]) (1.3.1)\n", "Requirement already satisfied: frozenlist in ./.local/lib/python3.10/site-packages (from ray[tune]) (1.4.1)\n", "Requirement already satisfied: requests in ./.local/lib/python3.10/site-packages (from ray[tune]) (2.31.0)\n", "Requirement already satisfied: pandas in /usr/lib/python3/dist-packages (from ray[tune]) (1.3.5)\n", "Collecting tensorboardX>=1.9 (from ray[tune])\n", " Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl.metadata (5.8 kB)\n", "Requirement already satisfied: pyarrow>=6.0.1 in ./.local/lib/python3.10/site-packages (from ray[tune]) (15.0.2)\n", "Requirement already satisfied: fsspec in ./.local/lib/python3.10/site-packages (from ray[tune]) (2024.2.0)\n", "Requirement already satisfied: numpy<2,>=1.16.6 in ./.local/lib/python3.10/site-packages (from pyarrow>=6.0.1->ray[tune]) (1.25.2)\n", "Requirement already satisfied: attrs>=22.2.0 in ./.local/lib/python3.10/site-packages (from jsonschema->ray[tune]) (23.1.0)\n", "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in ./.local/lib/python3.10/site-packages (from jsonschema->ray[tune]) (2023.11.2)\n", "Requirement already satisfied: referencing>=0.28.4 in ./.local/lib/python3.10/site-packages (from jsonschema->ray[tune]) (0.31.1)\n", "Requirement already satisfied: rpds-py>=0.7.1 in ./.local/lib/python3.10/site-packages (from jsonschema->ray[tune]) (0.13.2)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in ./.local/lib/python3.10/site-packages (from requests->ray[tune]) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests->ray[tune]) (3.3)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/lib/python3/dist-packages (from requests->ray[tune]) (1.26.5)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests->ray[tune]) (2020.6.20)\n", "Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl (101 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m101.7/101.7 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading ray-2.10.0-cp310-cp310-manylinux2014_x86_64.whl (65.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m65.1/65.1 MB\u001b[0m \u001b[31m97.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25h\u001b[33mDEPRECATION: flatbuffers 1.12.1-git20200711.33e2d80-dfsg1-0.6 has a non-standard version number. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of flatbuffers or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", "\u001b[0mInstalling collected packages: tensorboardX, ray\n", "Successfully installed ray-2.10.0 tensorboardX-2.6.2.2\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n" ] } ], "source": [ "! pip install optuna\n", "! pip install ray[tune]" ] }, { "cell_type": "code", "execution_count": 24, "id": "fae555d4-8640-4a81-9b49-4a9d9a5ab9b5", "metadata": {}, "outputs": [], "source": [ "def model_init():\n", " return AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)" ] }, { "cell_type": "code", "execution_count": 25, "id": "ac0f793c-8418-48d1-9b37-41005f0095c3", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ubuntu/.local/lib/python3.10/site-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches', 'even_batches', 'use_seedable_sampler']). Please pass an `accelerate.DataLoaderConfiguration` instead: \n", "dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)\n", " warnings.warn(\n", "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ], "source": [ "trainer = Trainer(\n", " model_init=model_init,\n", " args=args,\n", " train_dataset=encoded_dataset[\"train\"],\n", " eval_dataset=encoded_dataset[validation_key],\n", " tokenizer=tokenizer,\n", " compute_metrics=compute_metrics\n", ")" ] }, { "cell_type": "code", "execution_count": 26, "id": "7d74518a-ebc0-43ac-accb-65c32d5ec118", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[I 2024-03-27 11:07:46,609] A new study created in memory with name: no-name-f7c7ff48-4767-4715-9c09-9c4565193c42\n", "/home/ubuntu/.local/lib/python3.10/site-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches', 'even_batches', 'use_seedable_sampler']). Please pass an `accelerate.DataLoaderConfiguration` instead: \n", "dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)\n", " warnings.warn(\n", "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "data": { "text/html": [ "\n", "
\n", " \n", " \n", " [2140/2140 00:59, Epoch 4/4]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossMatthews Correlation
10.5686000.5282860.318150
20.3905000.5648420.387962
30.2373000.7255520.436872
40.1391000.9738280.429154

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "[I 2024-03-27 11:08:46,135] Trial 0 finished with value: 0.42915398713994973 and parameters: {'learning_rate': 6.658969020177832e-05, 'num_train_epochs': 4, 'seed': 11, 'per_device_train_batch_size': 16}. Best is trial 0 with value: 0.42915398713994973.\n", "/home/ubuntu/.local/lib/python3.10/site-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches', 'even_batches', 'use_seedable_sampler']). Please pass an `accelerate.DataLoaderConfiguration` instead: \n", "dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)\n", " warnings.warn(\n", "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [402/402 00:26, Epoch 3/3]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossMatthews Correlation
1No log0.5311860.332502
2No log0.5037170.443275
3No log0.5079680.439255

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "[I 2024-03-27 11:09:13,247] Trial 1 finished with value: 0.4392548203439382 and parameters: {'learning_rate': 1.1290628476063563e-05, 'num_train_epochs': 3, 'seed': 28, 'per_device_train_batch_size': 64}. Best is trial 1 with value: 0.4392548203439382.\n", "/home/ubuntu/.local/lib/python3.10/site-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches', 'even_batches', 'use_seedable_sampler']). Please pass an `accelerate.DataLoaderConfiguration` instead: \n", "dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)\n", " warnings.warn(\n", "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [8552/8552 03:23, Epoch 4/4]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossMatthews Correlation
10.5313000.5669700.414967
20.5124000.7862950.472533
30.3817000.9049490.502075
40.2726001.0147110.494873

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "[I 2024-03-27 11:12:37,216] Trial 2 finished with value: 0.4948726793760845 and parameters: {'learning_rate': 8.36801127282771e-06, 'num_train_epochs': 4, 'seed': 12, 'per_device_train_batch_size': 4}. Best is trial 2 with value: 0.4948726793760845.\n", "/home/ubuntu/.local/lib/python3.10/site-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches', 'even_batches', 'use_seedable_sampler']). Please pass an `accelerate.DataLoaderConfiguration` instead: \n", "dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)\n", " warnings.warn(\n", "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [536/536 00:21, Epoch 2/2]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossMatthews Correlation
1No log0.4792860.436850
20.4148000.5203290.502552

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "[I 2024-03-27 11:12:59,219] Trial 3 finished with value: 0.5025517897100551 and parameters: {'learning_rate': 9.440074279431108e-05, 'num_train_epochs': 2, 'seed': 17, 'per_device_train_batch_size': 32}. Best is trial 3 with value: 0.5025517897100551.\n", "/home/ubuntu/.local/lib/python3.10/site-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches', 'even_batches', 'use_seedable_sampler']). Please pass an `accelerate.DataLoaderConfiguration` instead: \n", "dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)\n", " warnings.warn(\n", "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [535/535 00:14, Epoch 1/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossMatthews Correlation
10.6150000.6030500.000000

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "/usr/lib/python3/dist-packages/sklearn/metrics/_classification.py:846: RuntimeWarning: invalid value encountered in scalar divide\n", " mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)\n", "[I 2024-03-27 11:13:14,620] Trial 4 finished with value: 0.0 and parameters: {'learning_rate': 1.8300985987395685e-06, 'num_train_epochs': 1, 'seed': 13, 'per_device_train_batch_size': 16}. Best is trial 3 with value: 0.5025517897100551.\n", "/home/ubuntu/.local/lib/python3.10/site-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches', 'even_batches', 'use_seedable_sampler']). Please pass an `accelerate.DataLoaderConfiguration` instead: \n", "dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)\n", " warnings.warn(\n", "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [ 2138/10690 00:50 < 03:20, 42.59 it/s, Epoch 1/5]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossMatthews Correlation
10.5351000.5739250.380639

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "[I 2024-03-27 11:14:05,400] Trial 5 pruned. \n", "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [134/402 00:08 < 00:16, 16.04 it/s, Epoch 1/3]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossMatthews Correlation
1No log0.5986330.000000

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "/usr/lib/python3/dist-packages/sklearn/metrics/_classification.py:846: RuntimeWarning: invalid value encountered in scalar divide\n", " mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)\n", "[I 2024-03-27 11:14:14,176] Trial 6 pruned. \n", "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [1069/1069 00:26, Epoch 1/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossMatthews Correlation
10.5038000.5273980.379181

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "[I 2024-03-27 11:14:40,919] Trial 7 finished with value: 0.37918052306046424 and parameters: {'learning_rate': 1.0727131909090178e-05, 'num_train_epochs': 1, 'seed': 37, 'per_device_train_batch_size': 8}. Best is trial 3 with value: 0.5025517897100551.\n", "/home/ubuntu/.local/lib/python3.10/site-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches', 'even_batches', 'use_seedable_sampler']). Please pass an `accelerate.DataLoaderConfiguration` instead: \n", "dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)\n", " warnings.warn(\n", "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [2138/2138 00:52, Epoch 2/2]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossMatthews Correlation
10.5280000.5113690.389045
20.3579000.6386030.463981

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "[I 2024-03-27 11:15:33,685] Trial 8 finished with value: 0.46398061315082145 and parameters: {'learning_rate': 4.810569035434538e-05, 'num_train_epochs': 2, 'seed': 11, 'per_device_train_batch_size': 8}. Best is trial 3 with value: 0.5025517897100551.\n", "/home/ubuntu/.local/lib/python3.10/site-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches', 'even_batches', 'use_seedable_sampler']). Please pass an `accelerate.DataLoaderConfiguration` instead: \n", "dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)\n", " warnings.warn(\n", "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [268/804 00:09 < 00:20, 26.67 it/s, Epoch 1/3]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossMatthews Correlation
1No log0.5715600.046356

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "[I 2024-03-27 11:15:44,118] Trial 9 pruned. \n" ] } ], "source": [ "best_run = trainer.hyperparameter_search(n_trials=10, direction=\"maximize\")" ] }, { "cell_type": "code", "execution_count": 27, "id": "ce0ebef8-3a96-4401-a62b-1771b2a68b24", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "BestRun(run_id='3', objective=0.5025517897100551, hyperparameters={'learning_rate': 9.440074279431108e-05, 'num_train_epochs': 2, 'seed': 17, 'per_device_train_batch_size': 32}, run_summary=None)" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "best_run" ] }, { "cell_type": "code", "execution_count": 28, "id": "efba4c29-56d3-459f-836e-ead6ec4c179f", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [536/536 00:21, Epoch 2/2]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossMatthews Correlation
1No log0.4792860.436850
20.4148000.5203290.502552

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "TrainOutput(global_step=536, training_loss=0.40565217964684785, metrics={'train_runtime': 21.0572, 'train_samples_per_second': 812.168, 'train_steps_per_second': 25.454, 'total_flos': 153655196855484.0, 'train_loss': 0.40565217964684785, 'epoch': 2.0})" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "for n,v in best_run.hyperparameters.items():\n", " setattr(trainer.args, n, v)\n", "\n", "trainer.train()" ] }, { "cell_type": "code", "execution_count": null, "id": "06baa2a0-6d79-4e2e-ad8e-d67ec1ed8c57", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }