Upload 2 files
Browse files
lecture_midm_7B_food_order_understanding_v1_2 (1).ipynb
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"cells":[{"cell_type":"markdown","metadata":{"id":"28e4c4d1-a73f-437b-a1bd-c2cc3874924a"},"source":["# ๊ฐ์ 11์ฃผ์ฐจ: midm-food-order-understanding\n","\n","1. KT-AI/midm-bitext-S-7B-inst-v1 ๋ฅผ ์ฃผ๋ฌธ ๋ฌธ์ฅ ์ดํด์ ๋ฏธ์ธ ํ๋\n","\n","- food-order-understanding-small-3200.json (ํ์ต)\n","- food-order-understanding-small-800.json (๊ฒ์ฆ)\n","\n","\n","์ข
์์ ์ธ ํ์ ๋ด์ฉ\n","- huggingface ๊ณ์ ์ค์ ๋ฐ llama-2 ์ฌ์ฉ ์น์ธ\n","- ๋ก๊น
์ ์ํ wandb\n","\n","\n","history\n","\n","v1.2\n","- KT-AI/midm-bitext-S-7B-inst-v1 ์ safetensors ํฌ๋งท์ด ์ฌ๋ผ์๊ธฐ์, ํด๋น ๋ฆฌํฌ์์ ๋ฐ๋๋ก ์ค์ ๋ณ๊ฒฝ\n","- ์ ์ฒด ๊ณผ์ ์ฌ๊ฒ์ฆ"],"id":"28e4c4d1-a73f-437b-a1bd-c2cc3874924a"},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nDZe_wqKU6J3","outputId":"031e0ee2-9385-44c0-ab12-97cb3c95ffc9","executionInfo":{"status":"ok","timestamp":1702304409865,"user_tz":-540,"elapsed":14624,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[{"output_type":"stream","name":"stdout","text":["Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.35.2)\n","Requirement already satisfied: peft in /usr/local/lib/python3.10/dist-packages (0.7.0)\n","Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (0.25.0)\n","Requirement already satisfied: optimum in /usr/local/lib/python3.10/dist-packages (1.15.0)\n","Requirement already satisfied: bitsandbytes in /usr/local/lib/python3.10/dist-packages (0.41.3.post1)\n","Requirement already satisfied: trl in /usr/local/lib/python3.10/dist-packages (0.7.4)\n","Requirement already satisfied: wandb in /usr/local/lib/python3.10/dist-packages (0.16.1)\n","Requirement already satisfied: einops in /usr/local/lib/python3.10/dist-packages (0.7.0)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.13.1)\n","Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.4)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.23.5)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.2)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.6.3)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n","Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.15.0)\n","Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.1)\n","Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.1)\n","Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from peft) (5.9.5)\n","Requirement already satisfied: torch>=1.13.0 in /usr/local/lib/python3.10/dist-packages (from peft) (2.1.0+cu118)\n","Requirement already satisfied: coloredlogs in /usr/local/lib/python3.10/dist-packages (from optimum) (15.0.1)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from optimum) (1.12)\n","Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (from optimum) (2.15.0)\n","Requirement already satisfied: tyro>=0.5.11 in /usr/local/lib/python3.10/dist-packages (from trl) (0.6.0)\n","Requirement already satisfied: Click!=8.0.0,>=7.1 in /usr/local/lib/python3.10/dist-packages (from wandb) (8.1.7)\n","Requirement already satisfied: GitPython!=3.1.29,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (3.1.40)\n","Requirement already satisfied: sentry-sdk>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (1.38.0)\n","Requirement already satisfied: docker-pycreds>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (0.4.0)\n","Requirement already satisfied: setproctitle in /usr/local/lib/python3.10/dist-packages (from wandb) (1.3.3)\n","Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from wandb) (67.7.2)\n","Requirement already satisfied: appdirs>=1.4.3 in /usr/local/lib/python3.10/dist-packages (from wandb) (1.4.4)\n","Requirement already satisfied: protobuf!=4.21.0,<5,>=3.19.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (3.20.3)\n","Requirement already satisfied: six>=1.4.0 in /usr/local/lib/python3.10/dist-packages (from docker-pycreds>=0.4.0->wandb) (1.16.0)\n","Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from GitPython!=3.1.29,>=1.0.0->wandb) (4.0.11)\n","Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (2023.6.0)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (4.5.0)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.6)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.11.17)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (3.2.1)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (3.1.2)\n","Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (2.1.0)\n","Requirement already satisfied: sentencepiece!=0.1.92,>=0.1.91 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.1.99)\n","Requirement already satisfied: docstring-parser>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from tyro>=0.5.11->trl) (0.15)\n","Requirement already satisfied: rich>=11.1.0 in /usr/local/lib/python3.10/dist-packages (from tyro>=0.5.11->trl) (13.7.0)\n","Requirement already satisfied: shtab>=1.5.6 in /usr/local/lib/python3.10/dist-packages (from tyro>=0.5.11->trl) (1.6.5)\n","Requirement already satisfied: humanfriendly>=9.1 in /usr/local/lib/python3.10/dist-packages (from coloredlogs->optimum) (10.0)\n","Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets->optimum) (9.0.0)\n","Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets->optimum) (0.6)\n","Requirement already satisfied: dill<0.3.8,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets->optimum) (0.3.7)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets->optimum) (1.5.3)\n","Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets->optimum) (3.4.1)\n","Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets->optimum) (0.70.15)\n","Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->optimum) (3.9.1)\n","Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->optimum) (1.3.0)\n","Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum) (23.1.0)\n","Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum) (6.0.4)\n","Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum) (1.9.3)\n","Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum) (1.4.0)\n","Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum) (1.3.1)\n","Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum) (4.0.3)\n","Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.10/dist-packages (from gitdb<5,>=4.0.1->GitPython!=3.1.29,>=1.0.0->wandb) (5.0.1)\n","Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=11.1.0->tyro>=0.5.11->trl) (3.0.0)\n","Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=11.1.0->tyro>=0.5.11->trl) (2.16.1)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.13.0->peft) (2.1.3)\n","Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->optimum) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->optimum) (2023.3.post1)\n","Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=11.1.0->tyro>=0.5.11->trl) (0.1.2)\n"]}],"source":["pip install transformers peft accelerate optimum bitsandbytes trl wandb einops"],"id":"nDZe_wqKU6J3"},{"cell_type":"code","execution_count":3,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"51eb00d7-2928-41ad-9ae9-7f0da7d64d6d","outputId":"e7e31196-fa10-4589-e5e8-c4086486db5f","executionInfo":{"status":"ok","timestamp":1702304447771,"user_tz":-540,"elapsed":30386,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/trl/trainer/ppo_config.py:141: UserWarning: The `optimize_cuda_cache` arguement will be deprecated soon, please use `optimize_device_cache` instead.\n"," warnings.warn(\n"]}],"source":["import os\n","from dataclasses import dataclass, field\n","from typing import Optional\n","import re\n","\n","import torch\n","import tyro\n","from accelerate import Accelerator\n","from datasets import load_dataset, Dataset\n","from peft import AutoPeftModelForCausalLM, LoraConfig\n","from tqdm import tqdm\n","from transformers import (\n"," AutoModelForCausalLM,\n"," AutoTokenizer,\n"," BitsAndBytesConfig,\n"," TrainingArguments,\n",")\n","\n","from trl import SFTTrainer\n","\n","from trl.trainer import ConstantLengthDataset"],"id":"51eb00d7-2928-41ad-9ae9-7f0da7d64d6d"},{"cell_type":"code","execution_count":4,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":162,"referenced_widgets":["dbe8b80107f646fca9ce17fc6898688e","25bab324b2b9446bad5f3a73eed40e68","1e5df26c96974f9e80ec411cc2efb005","726bbc9eda2647089f64254e9afc18a6","730a80d2060d4c0d9ddd2e17f2da0045","cd2ea8d1f93c436c8045979227f28f39","e520cbc12c7f45809976dfbfcf56dd64","cacc47dd52114b3caa6a0a420f748793","435d3880497f437fbe82c5c5aea4723b","f2c6a7c598a2446d980e5b099f8b0504","380d699b391e443594c77e0618acc1e6","81c738cb1572429fad029c865af5864e","1dbd9abdfd9f441a9a2a92797469029f","bdff58ba27c74f89acc6ce2fa028b322","a8d2283aa6d44f1ab1549f4311e88e2d","ff6ee54fece6482fa4908c5bd6f35331","4552475fe488474e98941eb5bc34fe1e","349de155fbbb411b98558636e5b363e5","29721702addc4325b2d6578e51ad6212","ff3d0f971a534f23928c1c9b133ade05","38d4d232d70d49dd8c3ab620e6cfb96c","7dcd8bfea49a447390fd3d693ce473f8","a827efea829546b7b7e5e42a465849e4","fee5d6bf794f4cb7962ef9985fbf4348","bb9ba62e3cd74e5d965fd6d7cbfffcdb","6d01340c7ea248da9b089906ddb0743f","520fd7520fe4457f88e1e7bdcbff3e99","66775e202d174977937a2bb33552e08d","ab2576b47a964778a4fb23a0177c2372","a99d5e99af0748a289fa755b80c2ceaf","129d75c4582a42b98245c5a79ea22525","92fdf3c90389449595e1d7b3605f6953"]},"id":"tX7gYxZaVhYL","outputId":"368e5df8-8976-47c1-a8be-d407e4e16a4d","executionInfo":{"status":"ok","timestamp":1702304450076,"user_tz":-540,"elapsed":364,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[{"output_type":"display_data","data":{"text/plain":["VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svโฆ"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"dbe8b80107f646fca9ce17fc6898688e"}},"metadata":{}}],"source":["from huggingface_hub import notebook_login\n","\n","notebook_login()"],"id":"tX7gYxZaVhYL"},{"cell_type":"markdown","metadata":{"id":"FuKA5uZihmdh"},"source":["๋๋ผ์ด๋ธ ๋ง์ดํธ ํ ํ์ผ ์
๋ก๋\n","- food-order-understanding-small-3200.json\n","- food-order-understanding-small-800.json"],"id":"FuKA5uZihmdh"},{"cell_type":"code","execution_count":5,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"4DF9D2SXVpHP","outputId":"b457ae66-95f7-4250-a687-2125ad618ccc","executionInfo":{"status":"ok","timestamp":1702304468578,"user_tz":-540,"elapsed":3443,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[{"output_type":"stream","name":"stdout","text":["Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount(\"/gdrive\", force_remount=True).\n"]}],"source":["from google.colab import drive\n","drive.mount('/gdrive')"],"id":"4DF9D2SXVpHP"},{"cell_type":"code","execution_count":null,"metadata":{"id":"VFgITUI8WjKe"},"outputs":[],"source":["# ๋ด๊ฐ ์
๋ก๋ํ ๊ฒฝ๋ก ๊ธฐ์ต ํด๋ \n","\n","# /gdrive/MyDrive/food-order-understanding-small-3200.json\n","# /gdrive/MyDrive/food-order-understanding-small-800.json"],"id":"VFgITUI8WjKe"},{"cell_type":"markdown","metadata":{"id":"036eece3-5f89-4fec-b0cd-268478b5e83d"},"source":["# ๋งค๊ฐ ๋ณ์ ์ค์ "],"id":"036eece3-5f89-4fec-b0cd-268478b5e83d"},{"cell_type":"code","execution_count":6,"metadata":{"id":"e03d01b5-eaeb-4626-9dc5-47c3691e7fcf","executionInfo":{"status":"ok","timestamp":1702304472596,"user_tz":-540,"elapsed":362,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[],"source":["@dataclass\n","class ScriptArguments:\n"," cache_dir: Optional[str] = field(\n"," default=None, metadata={\"help\": \"the cache dir\"}\n"," )\n"," model_name: Optional[str] = field(\n"," default=\"meta-llama/Llama-2-7b-chat-hf\", metadata={\"help\": \"the model name\"}\n"," )\n","\n"," dataset_name: Optional[str] = field(\n"," default=None,\n"," metadata={\"help\": \"the dataset name\"},\n"," )\n"," seq_length: Optional[int] = field(\n"," default=1024, metadata={\"help\": \"the sequence length\"}\n"," )\n"," num_workers: Optional[int] = field(\n"," default=8, metadata={\"help\": \"the number of workers\"}\n"," )\n"," training_args: TrainingArguments = field(\n"," default_factory=lambda: TrainingArguments(\n"," output_dir=\"./results\",\n"," # max_steps=500,\n"," logging_steps=20,\n"," # save_steps=10,\n"," per_device_train_batch_size=1,\n"," per_device_eval_batch_size=1,\n"," gradient_accumulation_steps=2,\n"," gradient_checkpointing=False,\n"," group_by_length=False,\n"," learning_rate=1e-4,\n"," lr_scheduler_type=\"cosine\",\n"," # warmup_steps=100,\n"," warmup_ratio=0.03,\n"," max_grad_norm=0.3,\n"," weight_decay=0.05,\n"," save_total_limit=20,\n"," save_strategy=\"epoch\",\n"," num_train_epochs=1,\n"," optim=\"paged_adamw_32bit\",\n"," fp16=True,\n"," remove_unused_columns=False,\n"," report_to=\"wandb\",\n"," )\n"," )\n","\n"," packing: Optional[bool] = field(\n"," default=True, metadata={\"help\": \"whether to use packing for SFTTrainer\"}\n"," )\n","\n"," peft_config: LoraConfig = field(\n"," default_factory=lambda: LoraConfig(\n"," r=8,\n"," lora_alpha=16,\n"," lora_dropout=0.05,\n"," target_modules=[\"c_attn\", \"c_proj\", \"c_fc\"],\n"," bias=\"none\",\n"," task_type=\"CAUSAL_LM\",\n"," )\n"," )\n","\n"," merge_with_final_checkpoint: Optional[bool] = field(\n"," default=False, metadata={\"help\": \"Do only merge with final checkpoint\"}\n"," )"],"id":"e03d01b5-eaeb-4626-9dc5-47c3691e7fcf"},{"cell_type":"markdown","metadata":{"id":"b0b34850-006c-4c87-a7d5-27c6c871e7de"},"source":["# ์ ํธ๋ฆฌํฐ"],"id":"b0b34850-006c-4c87-a7d5-27c6c871e7de"},{"cell_type":"code","execution_count":7,"metadata":{"id":"8224d213-8766-4b40-899e-1a1b8d164365","executionInfo":{"status":"ok","timestamp":1702304487747,"user_tz":-540,"elapsed":300,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[],"source":["def chars_token_ratio(dataset, tokenizer, nb_examples=400):\n"," \"\"\"\n"," Estimate the average number of characters per token in the dataset.\n"," \"\"\"\n"," total_characters, total_tokens = 0, 0\n"," for _, example in tqdm(zip(range(nb_examples), iter(dataset)), total=nb_examples):\n"," text = prepare_sample_text(example)\n"," total_characters += len(text)\n"," if tokenizer.is_fast:\n"," total_tokens += len(tokenizer(text).tokens())\n"," else:\n"," total_tokens += len(tokenizer.tokenize(text))\n","\n"," return total_characters / total_tokens\n","\n","\n","def print_trainable_parameters(model):\n"," \"\"\"\n"," Prints the number of trainable parameters in the model.\n"," \"\"\"\n"," trainable_params = 0\n"," all_param = 0\n"," for _, param in model.named_parameters():\n"," all_param += param.numel()\n"," if param.requires_grad:\n"," trainable_params += param.numel()\n"," print(\n"," f\"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}\"\n"," )"],"id":"8224d213-8766-4b40-899e-1a1b8d164365"},{"cell_type":"markdown","metadata":{"id":"58085944-b951-4c9b-bdeb-0ccc60c2a3b3"},"source":["# ๋ฐ์ดํฐ ๋ก๋ฉ"],"id":"58085944-b951-4c9b-bdeb-0ccc60c2a3b3"},{"cell_type":"code","execution_count":8,"metadata":{"id":"b49c3470-480e-4ff2-b2c5-fcf1d3a13fba","executionInfo":{"status":"ok","timestamp":1702304494303,"user_tz":-540,"elapsed":332,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[],"source":["def prepare_sample_text(example):\n"," \"\"\"Prepare the text from a sample of the dataset.\"\"\"\n","\n"," prompt_template = \"\"\"###System;{System}\n"," ###User;{User}\n"," ###Midm;{Midm}\"\"\"\n","\n"," default_system_msg = (\n"," \"๋๋ ๋จผ์ ์ฌ์ฉ์๊ฐ ์
๋ ฅํ ์ฃผ๋ฌธ ๋ฌธ์ฅ์ ๋ถ์ํ๋ ์์ด์ ํธ์ด๋ค. ์ด๋ก๋ถํฐ ์ฃผ๋ฌธ์ ๊ตฌ์ฑํ๋ ์์๋ช
, ์ต์
๋ช
, ์๋์ ์ฐจ๋ก๋๋ก ์ถ์ถํด์ผ ํ๋ค.\"\n"," )\n","\n"," text = (\n"," prompt_template.format(System=default_system_msg, User=example[\"input\"],Midm=example[\"output\"])\n"," )\n","\n"," return text"],"id":"b49c3470-480e-4ff2-b2c5-fcf1d3a13fba"},{"cell_type":"code","execution_count":9,"metadata":{"id":"5d9abfb4-339d-414b-855b-ced51631b752","executionInfo":{"status":"ok","timestamp":1702304501267,"user_tz":-540,"elapsed":326,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[],"source":["def create_datasets(tokenizer, args):\n"," train_data = Dataset.from_json(args.dataset_name)\n","\n"," chars_per_token = chars_token_ratio(train_data, tokenizer)\n"," print(f\"The character to token ratio of the dataset is: {chars_per_token:.2f}\")\n","\n"," train_dataset = ConstantLengthDataset(\n"," tokenizer,\n"," train_data,\n"," formatting_func=prepare_sample_text,\n"," infinite=True,\n"," seq_length=args.seq_length,\n"," chars_per_token=chars_per_token,\n"," )\n"," return train_dataset"],"id":"5d9abfb4-339d-414b-855b-ced51631b752"},{"cell_type":"markdown","metadata":{"id":"2e7ef79d-a354-4c80-9435-c130ffed9e32"},"source":["# ๋ฏธ์ธ ํ๋์ฉ ๋ชจ๋ธ ๋ก๋ฉ"],"id":"2e7ef79d-a354-4c80-9435-c130ffed9e32"},{"cell_type":"code","execution_count":10,"metadata":{"id":"bf4c65e0-5ffa-4a20-9e78-6c1f030572ff","executionInfo":{"status":"ok","timestamp":1702304507967,"user_tz":-540,"elapsed":282,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[],"source":["script_args = ScriptArguments(\n"," num_workers=2,\n"," seq_length=384,\n"," dataset_name='/gdrive/MyDrive/food-order-understanding-small-3200.json',\n"," model_name='KT-AI/midm-bitext-S-7B-inst-v1',\n"," # model_name='jangmin/midm-7b-safetensors-only',\n"," )"],"id":"bf4c65e0-5ffa-4a20-9e78-6c1f030572ff"},{"cell_type":"code","execution_count":11,"metadata":{"id":"372c64be-8fc0-4cc8-bda8-92ecf3632cc3","executionInfo":{"status":"ok","timestamp":1702304513314,"user_tz":-540,"elapsed":310,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[],"source":["script_args.training_args.logging_steps = 50\n","script_args.training_args.max_steps = 300\n","script_args.training_args.output_dir = '/gdrive/MyDrive/lora-midm-7b-food-order-understanding'\n","script_args.training_args.run_name = 'midm-7b-food-order-understanding'"],"id":"372c64be-8fc0-4cc8-bda8-92ecf3632cc3"},{"cell_type":"code","execution_count":12,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"bac62c01-21ef-491e-a686-cf4988186c58","outputId":"1c61c322-090d-4dc1-fe28-173a91fd2a18","executionInfo":{"status":"ok","timestamp":1702304515007,"user_tz":-540,"elapsed":350,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[{"output_type":"stream","name":"stdout","text":["ScriptArguments(cache_dir=None, model_name='KT-AI/midm-bitext-S-7B-inst-v1', dataset_name='/gdrive/MyDrive/food-order-understanding-small-3200.json', seq_length=384, num_workers=2, training_args=TrainingArguments(\n","_n_gpu=1,\n","adafactor=False,\n","adam_beta1=0.9,\n","adam_beta2=0.999,\n","adam_epsilon=1e-08,\n","auto_find_batch_size=False,\n","bf16=False,\n","bf16_full_eval=False,\n","data_seed=None,\n","dataloader_drop_last=False,\n","dataloader_num_workers=0,\n","dataloader_pin_memory=True,\n","ddp_backend=None,\n","ddp_broadcast_buffers=None,\n","ddp_bucket_cap_mb=None,\n","ddp_find_unused_parameters=None,\n","ddp_timeout=1800,\n","debug=[],\n","deepspeed=None,\n","disable_tqdm=False,\n","dispatch_batches=None,\n","do_eval=False,\n","do_predict=False,\n","do_train=False,\n","eval_accumulation_steps=None,\n","eval_delay=0,\n","eval_steps=None,\n","evaluation_strategy=no,\n","fp16=True,\n","fp16_backend=auto,\n","fp16_full_eval=False,\n","fp16_opt_level=O1,\n","fsdp=[],\n","fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},\n","fsdp_min_num_params=0,\n","fsdp_transformer_layer_cls_to_wrap=None,\n","full_determinism=False,\n","gradient_accumulation_steps=2,\n","gradient_checkpointing=False,\n","gradient_checkpointing_kwargs=None,\n","greater_is_better=None,\n","group_by_length=False,\n","half_precision_backend=auto,\n","hub_always_push=False,\n","hub_model_id=None,\n","hub_private_repo=False,\n","hub_strategy=every_save,\n","hub_token=<HUB_TOKEN>,\n","ignore_data_skip=False,\n","include_inputs_for_metrics=False,\n","include_tokens_per_second=False,\n","jit_mode_eval=False,\n","label_names=None,\n","label_smoothing_factor=0.0,\n","learning_rate=0.0001,\n","length_column_name=length,\n","load_best_model_at_end=False,\n","local_rank=0,\n","log_level=passive,\n","log_level_replica=warning,\n","log_on_each_node=True,\n","logging_dir=./results/runs/Dec11_14-21-47_8ccc3e745a6c,\n","logging_first_step=False,\n","logging_nan_inf_filter=True,\n","logging_steps=50,\n","logging_strategy=steps,\n","lr_scheduler_type=cosine,\n","max_grad_norm=0.3,\n","max_steps=300,\n","metric_for_best_model=None,\n","mp_parameters=,\n","neftune_noise_alpha=None,\n","no_cuda=False,\n","num_train_epochs=1,\n","optim=paged_adamw_32bit,\n","optim_args=None,\n","output_dir=/gdrive/MyDrive/lora-midm-7b-food-order-understanding,\n","overwrite_output_dir=False,\n","past_index=-1,\n","per_device_eval_batch_size=1,\n","per_device_train_batch_size=1,\n","prediction_loss_only=False,\n","push_to_hub=False,\n","push_to_hub_model_id=None,\n","push_to_hub_organization=None,\n","push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n","ray_scope=last,\n","remove_unused_columns=False,\n","report_to=['wandb'],\n","resume_from_checkpoint=None,\n","run_name=midm-7b-food-order-understanding,\n","save_on_each_node=False,\n","save_safetensors=True,\n","save_steps=500,\n","save_strategy=epoch,\n","save_total_limit=20,\n","seed=42,\n","skip_memory_metrics=True,\n","split_batches=False,\n","tf32=None,\n","torch_compile=False,\n","torch_compile_backend=None,\n","torch_compile_mode=None,\n","torchdynamo=None,\n","tpu_metrics_debug=False,\n","tpu_num_cores=None,\n","use_cpu=False,\n","use_ipex=False,\n","use_legacy_prediction_loop=False,\n","use_mps_device=False,\n","warmup_ratio=0.03,\n","warmup_steps=0,\n","weight_decay=0.05,\n","), packing=True, peft_config=LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='CAUSAL_LM', inference_mode=False, r=8, target_modules={'c_fc', 'c_attn', 'c_proj'}, lora_alpha=16, lora_dropout=0.05, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}), merge_with_final_checkpoint=False)\n"]}],"source":["print(script_args)"],"id":"bac62c01-21ef-491e-a686-cf4988186c58"},{"cell_type":"code","execution_count":13,"metadata":{"id":"1ff1422e-184d-4438-b033-40ae8bdaa5fd","executionInfo":{"status":"ok","timestamp":1702304525838,"user_tz":-540,"elapsed":324,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[],"source":["bnb_config = BitsAndBytesConfig(\n"," load_in_4bit=True,\n"," bnb_4bit_quant_type=\"nf4\",\n"," bnb_4bit_compute_dtype=torch.bfloat16,\n",")"],"id":"1ff1422e-184d-4438-b033-40ae8bdaa5fd"},{"cell_type":"markdown","metadata":{"id":"elg7gcB-5zb7"},"source":["์๋ณธ์ธ 'KT-AI/midm-bitext-S-7B-inst-v1' ๋ *.bin ํํ๋ก ๋ชจ๋ธ์ ์ ๊ณตํ๋ค.\n","- ์ฝ๋ฉ์์ CPU ๋ฉ๋ชจ๋ฆฌ ๋ถ์กฑ ๋ฐ์\n","\n","ํด๊ฒฐ์ฑ
\n","- safetensors๋ก ๋ณํํ ๋ชจ๋ธ์ ์
๋ก๋ ํ๊ณ ์ด๋ฅผ ์ฌ์ฉํ๊ธฐ๋ก ํ๋ค."],"id":"elg7gcB-5zb7"},{"cell_type":"code","execution_count":14,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":168,"referenced_widgets":["b2e8914a604a4cd7a8160a247b46897e","80b1d408c82c4a16b237c9ca6ff853a9","9ae5d008fbdb49e793eeca063f8a9b79","e0f4a69b292d4821b24b1e0f8c85d994","ac74b0890fdb4386a50184258f6efea6","329a5858a60f4140b693ad2d40f2666c","ed062bc006874d5a975c048bf1b49111","0405581206a04b8f9e462b4a97a9b396","d39c0747d6da4f9095fb300b7ecdee14","35a047ec6fdd44df851354380808b081","51396a17ef894a3dbddbcc21f59e6fe9"]},"id":"15c8425e-bb0b-40c5-bfe8-385bac699b9d","outputId":"0e0eac23-cbe8-4fa6-9a80-c3710b860b4a","executionInfo":{"status":"ok","timestamp":1702304636354,"user_tz":-540,"elapsed":99278,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py:472: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.\n"," warnings.warn(\n"]},{"output_type":"display_data","data":{"text/plain":["Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"b2e8914a604a4cd7a8160a247b46897e"}},"metadata":{}},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py:374: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.\n"," warnings.warn(\n"]}],"source":["base_model = AutoModelForCausalLM.from_pretrained(\n"," script_args.model_name,\n"," quantization_config=bnb_config,\n"," device_map=\"auto\", # {\"\": Accelerator().local_process_index},\n"," trust_remote_code=True,\n"," use_auth_token=True,\n"," cache_dir=script_args.cache_dir,\n",")\n","base_model.config.use_cache = False"],"id":"15c8425e-bb0b-40c5-bfe8-385bac699b9d"},{"cell_type":"code","execution_count":15,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"E9D239NqbDba","outputId":"3924764c-e5b5-4d6e-c890-336a23859588","executionInfo":{"status":"ok","timestamp":1702304641816,"user_tz":-540,"elapsed":570,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[{"output_type":"execute_result","data":{"text/plain":["MidmLMHeadModel(\n"," (transformer): MidmModel(\n"," (wte): Embedding(72192, 4096)\n"," (rotary_pos_emb): RotaryEmbedding()\n"," (drop): Dropout(p=0.0, inplace=False)\n"," (h): ModuleList(\n"," (0-31): 32 x MidmBlock(\n"," (ln_1): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)\n"," (attn): MidmAttention(\n"," (c_attn): Linear4bit(in_features=4096, out_features=12288, bias=False)\n"," (c_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)\n"," (attn_dropout): Dropout(p=0.0, inplace=False)\n"," (resid_dropout): Dropout(p=0.0, inplace=False)\n"," )\n"," (ln_2): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)\n"," (mlp): MidmMLP(\n"," (c_fc): Linear4bit(in_features=4096, out_features=21760, bias=False)\n"," (c_proj): Linear4bit(in_features=10880, out_features=4096, bias=False)\n"," (dropout): Dropout(p=0.0, inplace=False)\n"," )\n"," )\n"," )\n"," (ln_f): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)\n"," )\n"," (lm_head): Linear(in_features=4096, out_features=72192, bias=False)\n",")"]},"metadata":{},"execution_count":15}],"source":["base_model"],"id":"E9D239NqbDba"},{"cell_type":"code","execution_count":16,"metadata":{"id":"d37b485f-4fd3-404f-ab02-2bf3e93b3fc2","executionInfo":{"status":"ok","timestamp":1702304645987,"user_tz":-540,"elapsed":397,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[],"source":["peft_config = script_args.peft_config"],"id":"d37b485f-4fd3-404f-ab02-2bf3e93b3fc2"},{"cell_type":"code","execution_count":17,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"4420fcc4-2bac-413d-b7aa-89455c512419","outputId":"e255ac39-e724-4e89-fd37-120292b4ad3e","executionInfo":{"status":"ok","timestamp":1702304647839,"user_tz":-540,"elapsed":338,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[{"output_type":"execute_result","data":{"text/plain":["LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='CAUSAL_LM', inference_mode=False, r=8, target_modules={'c_fc', 'c_attn', 'c_proj'}, lora_alpha=16, lora_dropout=0.05, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={})"]},"metadata":{},"execution_count":17}],"source":["peft_config"],"id":"4420fcc4-2bac-413d-b7aa-89455c512419"},{"cell_type":"code","execution_count":18,"metadata":{"id":"f47f9584-3988-46b8-a062-29dcde75a0e2","executionInfo":{"status":"ok","timestamp":1702304651037,"user_tz":-540,"elapsed":915,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[],"source":["tokenizer = AutoTokenizer.from_pretrained(\n"," script_args.model_name,\n"," trust_remote_code=True,\n"," cache_dir=script_args.cache_dir,\n",")\n","\n","if getattr(tokenizer, \"pad_token\", None) is None:\n"," tokenizer.pad_token = tokenizer.eos_token\n","tokenizer.padding_side = \"right\" # Fix weird overflow issue with fp16 training\n","\n","tokenizer.add_special_tokens(dict(bos_token='<s>'))\n","\n","base_model.config.pad_token_id = tokenizer.pad_token_id\n","base_model.config.bos_token_id = tokenizer.bos_token_id"],"id":"f47f9584-3988-46b8-a062-29dcde75a0e2"},{"cell_type":"code","execution_count":19,"metadata":{"id":"abd17c83-ab8d-44cb-b69b-fc0936c2cec5","executionInfo":{"status":"ok","timestamp":1702304654395,"user_tz":-540,"elapsed":339,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[],"source":["training_args = script_args.training_args"],"id":"abd17c83-ab8d-44cb-b69b-fc0936c2cec5"},{"cell_type":"code","execution_count":20,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"62e8139f-5179-4c75-84a7-0c818ab0a35a","outputId":"b0893f9b-1aad-499d-9857-e8b7faac026f","executionInfo":{"status":"ok","timestamp":1702304656295,"user_tz":-540,"elapsed":3,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[{"output_type":"stream","name":"stderr","text":["100%|โโโโโโโโโโ| 400/400 [00:00<00:00, 2881.84it/s]"]},{"output_type":"stream","name":"stdout","text":["The character to token ratio of the dataset is: 1.52\n"]},{"output_type":"stream","name":"stderr","text":["\n","/usr/local/lib/python3.10/dist-packages/trl/trainer/utils.py:548: UserWarning: The passed formatting_func has more than one argument. Usually that function should have a single argument `example` which corresponds to the dictionary returned by each element of the dataset. Make sure you know what you are doing.\n"," warnings.warn(\n"]}],"source":["train_dataset = create_datasets(tokenizer, script_args)"],"id":"62e8139f-5179-4c75-84a7-0c818ab0a35a"},{"cell_type":"code","execution_count":21,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"000314e9-f10b-4685-8da6-0511494a9eb4","outputId":"30604c53-3c78-45a5-80d4-3d8a06468906","executionInfo":{"status":"ok","timestamp":1702304659649,"user_tz":-540,"elapsed":455,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[{"output_type":"execute_result","data":{"text/plain":["3200"]},"metadata":{},"execution_count":21}],"source":["len(train_dataset)"],"id":"000314e9-f10b-4685-8da6-0511494a9eb4"},{"cell_type":"code","execution_count":22,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"4ba80a64-0ec7-4b29-ac95-7b3d34549f17","outputId":"82ff1e93-8546-4feb-e018-be6b94116331","executionInfo":{"status":"ok","timestamp":1702304662462,"user_tz":-540,"elapsed":879,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/trl/trainer/sft_trainer.py:267: UserWarning: You passed `packing=True` to the SFTTrainer, and you are training your model with `max_steps` strategy. The dataset will be iterated until the `max_steps` are reached.\n"," warnings.warn(\n"]}],"source":["trainer = SFTTrainer(\n"," model=base_model,\n"," train_dataset=train_dataset,\n"," eval_dataset=None,\n"," peft_config=peft_config,\n"," packing=script_args.packing,\n"," max_seq_length=script_args.seq_length,\n"," tokenizer=tokenizer,\n"," args=training_args,\n",")"],"id":"4ba80a64-0ec7-4b29-ac95-7b3d34549f17"},{"cell_type":"code","execution_count":23,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"6qPxfovivMuH","outputId":"eb88ae80-cc09-46de-9eaf-2f1c194e65b6","executionInfo":{"status":"ok","timestamp":1702304665032,"user_tz":-540,"elapsed":323,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[{"output_type":"execute_result","data":{"text/plain":["MidmLMHeadModel(\n"," (transformer): MidmModel(\n"," (wte): Embedding(72192, 4096)\n"," (rotary_pos_emb): RotaryEmbedding()\n"," (drop): Dropout(p=0.0, inplace=False)\n"," (h): ModuleList(\n"," (0-31): 32 x MidmBlock(\n"," (ln_1): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)\n"," (attn): MidmAttention(\n"," (c_attn): lora.Linear4bit(\n"," (base_layer): Linear4bit(in_features=4096, out_features=12288, bias=False)\n"," (lora_dropout): ModuleDict(\n"," (default): Dropout(p=0.05, inplace=False)\n"," )\n"," (lora_A): ModuleDict(\n"," (default): Linear(in_features=4096, out_features=8, bias=False)\n"," )\n"," (lora_B): ModuleDict(\n"," (default): Linear(in_features=8, out_features=12288, bias=False)\n"," )\n"," (lora_embedding_A): ParameterDict()\n"," (lora_embedding_B): ParameterDict()\n"," )\n"," (c_proj): lora.Linear4bit(\n"," (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)\n"," (lora_dropout): ModuleDict(\n"," (default): Dropout(p=0.05, inplace=False)\n"," )\n"," (lora_A): ModuleDict(\n"," (default): Linear(in_features=4096, out_features=8, bias=False)\n"," )\n"," (lora_B): ModuleDict(\n"," (default): Linear(in_features=8, out_features=4096, bias=False)\n"," )\n"," (lora_embedding_A): ParameterDict()\n"," (lora_embedding_B): ParameterDict()\n"," )\n"," (attn_dropout): Dropout(p=0.0, inplace=False)\n"," (resid_dropout): Dropout(p=0.0, inplace=False)\n"," )\n"," (ln_2): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)\n"," (mlp): MidmMLP(\n"," (c_fc): lora.Linear4bit(\n"," (base_layer): Linear4bit(in_features=4096, out_features=21760, bias=False)\n"," (lora_dropout): ModuleDict(\n"," (default): Dropout(p=0.05, inplace=False)\n"," )\n"," (lora_A): ModuleDict(\n"," (default): Linear(in_features=4096, out_features=8, bias=False)\n"," )\n"," (lora_B): ModuleDict(\n"," (default): Linear(in_features=8, out_features=21760, bias=False)\n"," )\n"," (lora_embedding_A): ParameterDict()\n"," (lora_embedding_B): ParameterDict()\n"," )\n"," (c_proj): lora.Linear4bit(\n"," (base_layer): Linear4bit(in_features=10880, out_features=4096, bias=False)\n"," (lora_dropout): ModuleDict(\n"," (default): Dropout(p=0.05, inplace=False)\n"," )\n"," (lora_A): ModuleDict(\n"," (default): Linear(in_features=10880, out_features=8, bias=False)\n"," )\n"," (lora_B): ModuleDict(\n"," (default): Linear(in_features=8, out_features=4096, bias=False)\n"," )\n"," (lora_embedding_A): ParameterDict()\n"," (lora_embedding_B): ParameterDict()\n"," )\n"," (dropout): Dropout(p=0.0, inplace=False)\n"," )\n"," )\n"," )\n"," (ln_f): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)\n"," )\n"," (lm_head): Linear(in_features=4096, out_features=72192, bias=False)\n",")"]},"metadata":{},"execution_count":23}],"source":["base_model"],"id":"6qPxfovivMuH"},{"cell_type":"code","execution_count":24,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"gw9xbeUgbZEo","outputId":"02436cba-c8bb-48e7-cf63-4bef5313ff80","executionInfo":{"status":"ok","timestamp":1702304668322,"user_tz":-540,"elapsed":363,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[{"output_type":"execute_result","data":{"text/plain":["PeftModelForCausalLM(\n"," (base_model): LoraModel(\n"," (model): MidmLMHeadModel(\n"," (transformer): MidmModel(\n"," (wte): Embedding(72192, 4096)\n"," (rotary_pos_emb): RotaryEmbedding()\n"," (drop): Dropout(p=0.0, inplace=False)\n"," (h): ModuleList(\n"," (0-31): 32 x MidmBlock(\n"," (ln_1): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)\n"," (attn): MidmAttention(\n"," (c_attn): lora.Linear4bit(\n"," (base_layer): Linear4bit(in_features=4096, out_features=12288, bias=False)\n"," (lora_dropout): ModuleDict(\n"," (default): Dropout(p=0.05, inplace=False)\n"," )\n"," (lora_A): ModuleDict(\n"," (default): Linear(in_features=4096, out_features=8, bias=False)\n"," )\n"," (lora_B): ModuleDict(\n"," (default): Linear(in_features=8, out_features=12288, bias=False)\n"," )\n"," (lora_embedding_A): ParameterDict()\n"," (lora_embedding_B): ParameterDict()\n"," )\n"," (c_proj): lora.Linear4bit(\n"," (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)\n"," (lora_dropout): ModuleDict(\n"," (default): Dropout(p=0.05, inplace=False)\n"," )\n"," (lora_A): ModuleDict(\n"," (default): Linear(in_features=4096, out_features=8, bias=False)\n"," )\n"," (lora_B): ModuleDict(\n"," (default): Linear(in_features=8, out_features=4096, bias=False)\n"," )\n"," (lora_embedding_A): ParameterDict()\n"," (lora_embedding_B): ParameterDict()\n"," )\n"," (attn_dropout): Dropout(p=0.0, inplace=False)\n"," (resid_dropout): Dropout(p=0.0, inplace=False)\n"," )\n"," (ln_2): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)\n"," (mlp): MidmMLP(\n"," (c_fc): lora.Linear4bit(\n"," (base_layer): Linear4bit(in_features=4096, out_features=21760, bias=False)\n"," (lora_dropout): ModuleDict(\n"," (default): Dropout(p=0.05, inplace=False)\n"," )\n"," (lora_A): ModuleDict(\n"," (default): Linear(in_features=4096, out_features=8, bias=False)\n"," )\n"," (lora_B): ModuleDict(\n"," (default): Linear(in_features=8, out_features=21760, bias=False)\n"," )\n"," (lora_embedding_A): ParameterDict()\n"," (lora_embedding_B): ParameterDict()\n"," )\n"," (c_proj): lora.Linear4bit(\n"," (base_layer): Linear4bit(in_features=10880, out_features=4096, bias=False)\n"," (lora_dropout): ModuleDict(\n"," (default): Dropout(p=0.05, inplace=False)\n"," )\n"," (lora_A): ModuleDict(\n"," (default): Linear(in_features=10880, out_features=8, bias=False)\n"," )\n"," (lora_B): ModuleDict(\n"," (default): Linear(in_features=8, out_features=4096, bias=False)\n"," )\n"," (lora_embedding_A): ParameterDict()\n"," (lora_embedding_B): ParameterDict()\n"," )\n"," (dropout): Dropout(p=0.0, inplace=False)\n"," )\n"," )\n"," )\n"," (ln_f): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)\n"," )\n"," (lm_head): Linear(in_features=4096, out_features=72192, bias=False)\n"," )\n"," )\n",")"]},"metadata":{},"execution_count":24}],"source":["trainer.model"],"id":"gw9xbeUgbZEo"},{"cell_type":"code","execution_count":25,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"edb204be-ec15-4800-af49-6cfbad2f7f9a","outputId":"a49ca1de-0d59-48d8-cbc3-ba91e3ba1904","executionInfo":{"status":"ok","timestamp":1702304672140,"user_tz":-540,"elapsed":304,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[{"output_type":"stream","name":"stdout","text":["trainable params: 16744448 || all params: 3821510656 || trainable%: 0.4381630592527648\n"]}],"source":["print_trainable_parameters(base_model)"],"id":"edb204be-ec15-4800-af49-6cfbad2f7f9a"},{"cell_type":"code","execution_count":26,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"sVA-VzeTigHs","outputId":"cb6cfa28-e9ad-4cc9-b9c5-7bfd0d8cadd2","executionInfo":{"status":"ok","timestamp":1702304673972,"user_tz":-540,"elapsed":292,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[{"output_type":"execute_result","data":{"text/plain":["7795015808"]},"metadata":{},"execution_count":26}],"source":["base_model.get_memory_footprint()"],"id":"sVA-VzeTigHs"},{"cell_type":"code","source":["trainer.model.print_trainable_parameters()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CTZmx_faQ-Xj","outputId":"ef6b9e25-ce54-49ae-fe9e-05b18de03fc4","executionInfo":{"status":"ok","timestamp":1702304675538,"user_tz":-540,"elapsed":2,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"id":"CTZmx_faQ-Xj","execution_count":27,"outputs":[{"output_type":"stream","name":"stdout","text":["trainable params: 16,744,448 || all params: 7,034,347,520 || trainable%: 0.23803839591934178\n"]}]},{"cell_type":"markdown","metadata":{"id":"76sRe172fGlm"},"source":["midm ๋ชจ๋ธ์ ์ฃผ๋ฌธ ๋ฌธ์ฅ ์ดํด์ ์ ์ฉ์ ํน์ง\n","- ๋ชจ๋ธ ๋ก๋ฉ ๊ณผ์ ์์ CPU๋ 5.1๊ธฐ๊ฐ, ๋์คํฌ 42.4๏ฟฝ๏ฟฝ๊ฐ, GPU ๋ฉ๋ชจ๋ฆฌ: 7,4 ๊ธฐ๊ฐ\n","\n","๊ตฌ๊ธ ์ฝ๋ฉ T-4 GPU: 300์คํ
(13:47์ด ์์)\n","\n","์ํ์ค ๊ธธ์ด 384์ ๊ฒฝ์ฐ\n","- 14.7 G / 15.0 G ์ฌ์ฉ\n","- ๋ฉ๋ชจ๋ฆฌ ์ค๋ฒํ๋ก์ฐ ๋ฐ์์ ์ด๋ณด๋ค ์ค์ผ ๊ฒ"],"id":"76sRe172fGlm"},{"cell_type":"code","execution_count":28,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":443},"id":"14019fa9-0c6f-4729-ac99-0d407af375b8","outputId":"7f0f51ba-6b7c-4aef-992d-c00a21dd7ed6","executionInfo":{"status":"ok","timestamp":1702305619411,"user_tz":-540,"elapsed":940657,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[{"output_type":"stream","name":"stderr","text":["\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33msuyeun0109\u001b[0m (\u001b[33msuyeun\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"]},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["Tracking run with wandb version 0.16.1"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["Run data is saved locally in <code>/content/wandb/run-20231211_142441-q0brniqd</code>"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["Syncing run <strong><a href='https://wandb.ai/suyeun/huggingface/runs/q0brniqd' target=\"_blank\">midm-7b-food-order-understanding</a></strong> to <a href='https://wandb.ai/suyeun/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":[" View project at <a href='https://wandb.ai/suyeun/huggingface' target=\"_blank\">https://wandb.ai/suyeun/huggingface</a>"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":[" View run at <a href='https://wandb.ai/suyeun/huggingface/runs/q0brniqd' target=\"_blank\">https://wandb.ai/suyeun/huggingface/runs/q0brniqd</a>"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["\n"," <div>\n"," \n"," <progress value='300' max='300' style='width:300px; height:20px; vertical-align: middle;'></progress>\n"," [300/300 15:27, Epoch 0/1]\n"," </div>\n"," <table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: left;\">\n"," <th>Step</th>\n"," <th>Training Loss</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <td>50</td>\n"," <td>1.040400</td>\n"," </tr>\n"," <tr>\n"," <td>100</td>\n"," <td>0.548100</td>\n"," </tr>\n"," <tr>\n"," <td>150</td>\n"," <td>0.504600</td>\n"," </tr>\n"," <tr>\n"," <td>200</td>\n"," <td>0.495700</td>\n"," </tr>\n"," <tr>\n"," <td>250</td>\n"," <td>0.518000</td>\n"," </tr>\n"," <tr>\n"," <td>300</td>\n"," <td>0.497100</td>\n"," </tr>\n"," </tbody>\n","</table><p>"]},"metadata":{}},{"output_type":"execute_result","data":{"text/plain":["TrainOutput(global_step=300, training_loss=0.6006682777404785, metrics={'train_runtime': 940.0842, 'train_samples_per_second': 0.638, 'train_steps_per_second': 0.319, 'total_flos': 9315508499251200.0, 'train_loss': 0.6006682777404785, 'epoch': 0.19})"]},"metadata":{},"execution_count":28}],"source":["trainer.train()"],"id":"14019fa9-0c6f-4729-ac99-0d407af375b8"},{"cell_type":"code","execution_count":29,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":35},"id":"3Y4FQSyRghQt","outputId":"60b008f1-1e1c-42f3-bd0c-1157fa7412b7","executionInfo":{"status":"ok","timestamp":1702305626226,"user_tz":-540,"elapsed":412,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[{"output_type":"execute_result","data":{"text/plain":["'/gdrive/MyDrive/lora-midm-7b-food-order-understanding'"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"}},"metadata":{},"execution_count":29}],"source":["script_args.training_args.output_dir"],"id":"3Y4FQSyRghQt"},{"cell_type":"code","execution_count":30,"metadata":{"id":"49f05450-da2a-4edd-9db2-63836a0ec73a","executionInfo":{"status":"ok","timestamp":1702305629228,"user_tz":-540,"elapsed":851,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[],"source":["trainer.save_model(script_args.training_args.output_dir)"],"id":"49f05450-da2a-4edd-9db2-63836a0ec73a"},{"cell_type":"markdown","metadata":{"id":"652f307e-e1d7-43ae-b083-dba2d94c2296"},"source":["# ์ถ๋ก ํ
์คํธ"],"id":"652f307e-e1d7-43ae-b083-dba2d94c2296"},{"cell_type":"code","execution_count":31,"metadata":{"id":"ea8a1fea-7499-4386-9dea-0509110f61af","executionInfo":{"status":"ok","timestamp":1702305631310,"user_tz":-540,"elapsed":857,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[],"source":["from transformers import pipeline, TextStreamer"],"id":"ea8a1fea-7499-4386-9dea-0509110f61af"},{"cell_type":"code","execution_count":32,"metadata":{"id":"52626888-1f6e-46b6-a8dd-836622149ff5","executionInfo":{"status":"ok","timestamp":1702305633700,"user_tz":-540,"elapsed":481,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[],"source":["instruction_prompt_template = \"\"\"###System;๋ค์์ ๋งค์ฅ์์ ๊ณ ๊ฐ์ด ์์์ ์ฃผ๋ฌธํ๋ ์ฃผ๋ฌธ ๋ฌธ์ฅ์ด๋ค. ์ด๋ฅผ ๋ถ์ํ์ฌ ์์๋ช
, ์ต์
๋ช
, ์๋์ ์ถ์ถํ์ฌ ๊ณ ๊ฐ์ ์๋๋ฅผ ์ดํดํ๊ณ ์ ํ๋ค.\n","๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ์์ฑํด์ฃผ๊ธฐ ๋ฐ๋๋ค.\n","\n","### ์ฃผ๋ฌธ ๋ฌธ์ฅ: {0} ### ๋ถ์ ๊ฒฐ๊ณผ:\n","\"\"\"\n","\n","prompt_template = \"\"\"###System;{System}\n","###User;{User}\n","###Midm;\"\"\"\n","\n","default_system_msg = (\n"," \"๋๋ ๋จผ์ ์ฌ์ฉ์๊ฐ ์
๋ ฅํ ์ฃผ๋ฌธ ๋ฌธ์ฅ์ ๋ถ์ํ๋ ์์ด์ ํธ์ด๋ค. ์ด๋ก๋ถํฐ ์ฃผ๋ฌธ์ ๊ตฌ์ฑํ๋ ์์๋ช
, ์ต์
๋ช
, ์๋์ ์ฐจ๋ก๋๋ก ์ถ์ถํด์ผ ํ๋ค.\"\n",")"],"id":"52626888-1f6e-46b6-a8dd-836622149ff5"},{"cell_type":"code","execution_count":33,"metadata":{"id":"46e844fa-8f63-4359-a4fb-df66e8171796","executionInfo":{"status":"ok","timestamp":1702305636576,"user_tz":-540,"elapsed":1,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[],"source":["evaluation_queries = [\n"," \"์ค๋์ ๋น๊ฐ์ค๋๊น ์ด๊ฑฐ ๋จน์. ์ผ์ ์งฌ๋ฝ ๊ณฑ๋ฐฐ๊ธฐ ํ๋ํ๊ตฌ์, ์ฌ์ฒ ํ์์ก ์ค์ง ํ๊ทธ๋ฆ ์ฃผ์ธ์.\",\n"," \"์์ด์ค์๋ฉ๋ฆฌ์นด๋
ธ ํจ์ฌ์ด์ฆ ํ์ ํ๊ณ ์. ๋ธ๊ธฐ์ค๋ฌด๋ ํ์ ์ฃผ์ธ์. ๋, ์ฝ๋๋ธ๋ฃจ๋ผ๋ผ ํ๋์.\",\n"," \"์ฐธ์ด์ฌ ํ๋ณ, ์ฝ์นด์ฝ๋ผ 1.5๋ฆฌํฐ ํ๋ณ, ํ
์ฌ๋ผ ํ๋ณ์ด์.\",\n"," \"๊ผฌ๋ง๋ฌด์นจ 1์ธ๋ถํ๊ณ ์, ๋ญ๋๋ฆฌํ ์ค์ ์ฃผ์ธ์. ๊ทธ๋ฆฌ๊ณ ์์ฃผ๋ ํ๋ณ ์ฃผ์ธ์.\",\n"," \"๊น์น์ฐ๊ฐ 3์ธ๋ถํ๊ณ ์, ๊ณ๋๋ง์ด ์ฃผ์ธ์.\",\n"," \"๋ถ๊ณ ๊ธฐ๋ฒ๊ฑฐ์ธํธ 1๊ฐํ๊ณ ์ ๊ฐ์ํ๊น ์ถ๊ฐํด์ฃผ์ธ์.\",\n"," \"๋ถ๋ญ๋ณถ์๋ฉด 1๊ฐ๋ ์ฌ๋ฆฌ๊ณฐํ๋ฉด 2๊ฐ ์ฃผ์ธ์.\",\n"," \"์นดํ๋ผ๋ผ ์์ด์ค ์ท์ถ๊ฐ ํ์ํ๊ตฌ์. ์ค์ฝ ํ๋ ์ฃผ์ธ์\",\n"," \"์ฌ๊ธฐ์ ์ถ์ฒ๋ญ๊ฐ๋น 4์ธ๋ถํ๊ณ ์. ๋ผ๋ฉด์ฌ๋ฆฌ ์ถ๊ฐํ๊ฒ ์ต๋๋ค. ์ฝ๋ผ 300ml ๋์บ์ฃผ์ธ์.\",\n"," \"์์์์ ์กฐ๋ญ์ด๋ก๊ตญ 3์ธ๋ถํ๊ณ ์. ๋ก๋ง๋ ํ์ธํธ ์ฃผ์ธ์.\",\n"," \"๊นํํ์ 2์ธ๋ถ ํ๊ณ ์ ์ฝ๋ผ 1.5๋ฆฌํฐ ํ๋ณ์ด์.\",\n","]"],"id":"46e844fa-8f63-4359-a4fb-df66e8171796"},{"cell_type":"code","execution_count":34,"metadata":{"id":"1919cf1f-482e-4185-9d06-e3cea1918416","executionInfo":{"status":"ok","timestamp":1702305639801,"user_tz":-540,"elapsed":344,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[],"source":["def wrapper_generate(model, input_prompt, do_stream=False):\n"," data = tokenizer(input_prompt, return_tensors=\"pt\")\n"," streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n"," input_ids = data.input_ids[..., :-1]\n"," with torch.no_grad():\n"," pred = model.generate(\n"," input_ids=input_ids.cuda(),\n"," streamer=streamer if do_stream else None,\n"," use_cache=True,\n"," max_new_tokens=float('inf'),\n"," do_sample=False\n"," )\n"," decoded_text = tokenizer.batch_decode(pred, skip_special_tokens=True)\n"," decoded_text = decoded_text[0].replace(\"<[!newline]>\", \"\\n\")\n"," return (decoded_text[len(input_prompt):])"],"id":"1919cf1f-482e-4185-9d06-e3cea1918416"},{"cell_type":"code","execution_count":35,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eaac1f6f-c823-4488-8edb-2f931ddf0daa","outputId":"c632e94d-faad-4244-b32d-139ace8783f8","executionInfo":{"status":"ok","timestamp":1702306195075,"user_tz":-540,"elapsed":552708,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1473: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use and modify the model generation configuration (see https://huggingface.co/docs/transformers/generation_strategies#default-text-generation-configuration )\n"," warnings.warn(\n"]}],"source":["eval_dic = {i:wrapper_generate(model=base_model, input_prompt=prompt_template.format(System=default_system_msg, User=evaluation_queries[i]))for i, query in enumerate(evaluation_queries)}"],"id":"eaac1f6f-c823-4488-8edb-2f931ddf0daa"},{"cell_type":"code","execution_count":36,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"fefd04ba-2ed8-4f84-bdd0-86d52b3f39f6","outputId":"0d52da0b-d64c-4d60-a624-81d094fbbb13","executionInfo":{"status":"ok","timestamp":1702306195075,"user_tz":-540,"elapsed":18,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[{"output_type":"stream","name":"stdout","text":["- ๋ถ์ ๊ฒฐ๊ณผ 0: ์์๋ช
:์ผ์ ์งฌ๋ฝ, ์ต์
:๊ณฑ๋ฐฐ๊ธฐ, ์๋:ํ๋\n","- ๋ถ์ ๊ฒฐ๊ณผ 1: ์์๋ช
:์ฌ์ฒ ํ์์ก, ์ต์
:์ค์ง, ์๋:ํ๊ทธ๋ฆ\n"]}],"source":["print(eval_dic[0])"],"id":"fefd04ba-2ed8-4f84-bdd0-86d52b3f39f6"},{"cell_type":"markdown","metadata":{"id":"3f471e3a-723b-4df5-aa72-46f571f6bab6"},"source":["# ๋ฏธ์ธํ๋๋ ๋ชจ๋ธ ๋ก๋ฉ ํ ํ
์คํธ"],"id":"3f471e3a-723b-4df5-aa72-46f571f6bab6"},{"cell_type":"code","execution_count":37,"metadata":{"id":"a43bdd07-7555-42b2-9888-a614afec892f","executionInfo":{"status":"ok","timestamp":1702306199550,"user_tz":-540,"elapsed":368,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[],"source":["bnb_config = BitsAndBytesConfig(\n"," load_in_4bit=True,\n"," bnb_4bit_quant_type=\"nf4\",\n"," bnb_4bit_compute_dtype=torch.bfloat16,\n",")"],"id":"a43bdd07-7555-42b2-9888-a614afec892f"},{"cell_type":"code","execution_count":39,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":705},"id":"39db2ee4-23c8-471f-89b2-bca34964bf81","outputId":"d00d2dc2-cd2f-480c-85a2-33cf265314b2","executionInfo":{"status":"error","timestamp":1702306279779,"user_tz":-540,"elapsed":15084,"user":{"displayName":"์กฐ์์ฐ","userId":"03810862007552836948"}}},"outputs":[{"output_type":"error","ename":"ValueError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)","\u001b[0;32m<ipython-input-39-1b36ff78cba8>\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m base_model = AutoModelForCausalLM.from_pretrained(\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mscript_args\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_name\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mquantization_config\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbnb_config\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mdevice_map\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"auto\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;31m# {\"\": Accelerator().local_process_index},\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mtrust_remote_code\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 559\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 560\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mregister\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__class__\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel_class\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexist_ok\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 561\u001b[0;31m return model_class.from_pretrained(\n\u001b[0m\u001b[1;32m 562\u001b[0m \u001b[0mpretrained_model_name_or_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mmodel_args\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mhub_kwargs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 563\u001b[0m )\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 3418\u001b[0m }\n\u001b[1;32m 3419\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"cpu\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdevice_map_without_lm_head\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m\"disk\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdevice_map_without_lm_head\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3420\u001b[0;31m raise ValueError(\n\u001b[0m\u001b[1;32m 3421\u001b[0m \"\"\"\n\u001b[1;32m 3422\u001b[0m \u001b[0mSome\u001b[0m \u001b[0mmodules\u001b[0m \u001b[0mare\u001b[0m \u001b[0mdispatched\u001b[0m \u001b[0mon\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mCPU\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mdisk\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mMake\u001b[0m \u001b[0msure\u001b[0m \u001b[0myou\u001b[0m \u001b[0mhave\u001b[0m \u001b[0menough\u001b[0m \u001b[0mGPU\u001b[0m \u001b[0mRAM\u001b[0m \u001b[0mto\u001b[0m \u001b[0mfit\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mValueError\u001b[0m: \n Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit\n the quantized model. If you want to dispatch the model on the CPU or the disk while keeping\n these modules in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom\n `device_map` to `from_pretrained`. Check\n https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu\n for more details.\n "]}],"source":["base_model = AutoModelForCausalLM.from_pretrained(\n"," script_args.model_name,\n"," quantization_config=bnb_config,\n"," device_map=\"auto\", # {\"\": Accelerator().local_process_index},\n"," trust_remote_code=True,\n"," use_auth_token=True,\n"," cache_dir=script_args.cache_dir,\n",")\n","base_model.config.use_cache = False"],"id":"39db2ee4-23c8-471f-89b2-bca34964bf81"},{"cell_type":"code","execution_count":null,"metadata":{"id":"b0b75ca4-730d-4bde-88bb-a86462a76d52"},"outputs":[],"source":["tokenizer = AutoTokenizer.from_pretrained(\n"," script_args.model_name,\n"," trust_remote_code=True,\n"," cache_dir=script_args.cache_dir,\n",")\n","\n","if getattr(tokenizer, \"pad_token\", None) is None:\n"," tokenizer.pad_token = tokenizer.eos_token\n","tokenizer.padding_side = \"right\" # Fix weird overflow issue with fp16 training\n","\n","tokenizer.add_special_tokens(dict(bos_token='<s>'))\n","\n","trained_model.config.pad_token_id = tokenizer.pad_token_id\n","trained_model.config.bos_token_id = tokenizer.bos_token_id"],"id":"b0b75ca4-730d-4bde-88bb-a86462a76d52"},{"cell_type":"markdown","metadata":{"id":"X1tRCa4EiYXp"},"source":["์ถ๋ก ๊ณผ์ ์์๋ GPU ๋ฉ๋ชจ๋ฆฌ๋ฅผ ์ฝ 5.5 GB ํ์ฉ"],"id":"X1tRCa4EiYXp"},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true,"base_uri":"https://localhost:8080/"},"id":"e374555b-9f8a-4617-8ea7-c1e6ee1b2999","outputId":"526d2827-6422-4399-d7ed-107b822b2bb2"},"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1473: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use and modify the model generation configuration (see https://huggingface.co/docs/transformers/generation_strategies#default-text-generation-configuration )\n"," warnings.warn(\n"]},{"output_type":"stream","name":"stdout","text":["- ๋ถ์ ๊ฒฐ๊ณผ 0: ์์๋ช
:์ผ์ ์งฌ๋ฝ, ์ต์
:๊ณฑ๋ฐฐ๊ธฐ, ์๋:ํ๋<[!newline]>- ๋ถ์ ๊ฒฐ๊ณผ 1: ์์๋ช
:์ฌ์ฒ ํ์์ก, ์ต์
:์ค์ง, ์๋:ํ๊ทธ๋ฆ\n","- ๋ถ์ ๊ฒฐ๊ณผ 0: ์์๋ช
:์์ด์ค์๋ฉ๋ฆฌ์นด๋
ธ,์ต์
:ํจ์ฌ์ด์ฆ,์๋:ํ์<[!newline]>- ๋ถ์ ๊ฒฐ๊ณผ 1: ์์๋ช
:๋ธ๊ธฐ์ค๋ฌด๋,์๋:ํ์<[!newline]>- ๋ถ์ ๊ฒฐ๊ณผ 2: ์์๋ช
:์ฝ๋๋ธ๋ฃจ๋ผ๋ผ,์๋:ํ๋\n","- ๋ถ์ ๊ฒฐ๊ณผ 0: ์์๋ช
:์ฐธ์ด์ฌ,์๋:ํ๋ณ<[!newline]>- ๋ถ์ ๊ฒฐ๊ณผ 1: ์์๋ช
:์ฝ์นด์ฝ๋ผ,์ต์
:1.5๋ฆฌํฐ,์๋:ํ๋ณ<[!newline]>- ๋ถ์ ๊ฒฐ๊ณผ 2: ์์๋ช
:ํ
์ฌ๋ผ,์๋:ํ๋ณ\n","- ๋ถ์ ๊ฒฐ๊ณผ 0: ์์๋ช
:๊ผฌ๋ง๋ฌด์นจ, ์๋:1์ธ๋ถ<[!newline]>- ๋ถ์ ๊ฒฐ๊ณผ 1: ์์๋ช
:๋ญ๋๋ฆฌํ, ์ต์
:์ค์<[!newline]>- ๋ถ์ ๊ฒฐ๊ณผ 2: ์์๋ช
:์์ฃผ, ์๋:ํ๋ณ\n","- ๋ถ์ ๊ฒฐ๊ณผ 0: ์์๋ช
:๊น์น์ฐ๊ฐ, ์๋:3์ธ๋ถ<[!newline]>- ๋ถ์ ๊ฒฐ๊ณผ 1: ์์๋ช
:๊ณ๋๋ง์ด\n","- ๋ถ์ ๊ฒฐ๊ณผ 0: ์์๋ช
:๋ถ๊ณ ๊ธฐ๋ฒ๊ฑฐ์ธํธ, ์๋:1๊ฐ<[!newline]>- ๋ถ์ ๊ฒฐ๊ณผ 1: ์์๋ช
:๊ฐ์ํ๊น, ์๋:์ถ๊ฐ\n","- ๋ถ์ ๊ฒฐ๊ณผ 0: "]}],"source":["eval_dic = {i:wrapper_generate(model=trained_model, do_stream=True, input_prompt=prompt_template.format(System=default_system_msg, User=evaluation_queries[i]))for i, query in enumerate(evaluation_queries)}"],"id":"e374555b-9f8a-4617-8ea7-c1e6ee1b2999"},{"cell_type":"code","execution_count":null,"metadata":{"id":"5d055bb0-5e5f-4221-a634-45d903c0f3b5"},"outputs":[],"source":["print(eval_dic[0])"],"id":"5d055bb0-5e5f-4221-a634-45d903c0f3b5"}],"metadata":{"accelerator":"GPU","colab":{"provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.13"},"widgets":{"application/vnd.jupyter.widget-state+json":{"dbe8b80107f646fca9ce17fc6898688e":{"model_module":"@jupyter-widgets/controls","model_name":"VBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_38d4d232d70d49dd8c3ab620e6cfb96c","IPY_MODEL_7dcd8bfea49a447390fd3d693ce473f8","IPY_MODEL_a827efea829546b7b7e5e42a465849e4","IPY_MODEL_fee5d6bf794f4cb7962ef9985fbf4348"],"layout":"IPY_MODEL_e520cbc12c7f45809976dfbfcf56dd64"}},"25bab324b2b9446bad5f3a73eed40e68":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_cacc47dd52114b3caa6a0a420f748793","placeholder":"โ","style":"IPY_MODEL_435d3880497f437fbe82c5c5aea4723b","value":"<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"}},"1e5df26c96974f9e80ec411cc2efb005":{"model_module":"@jupyter-widgets/controls","model_name":"PasswordModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"PasswordModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"PasswordView","continuous_update":true,"description":"Token:","description_tooltip":null,"disabled":false,"layout":"IPY_MODEL_f2c6a7c598a2446d980e5b099f8b0504","placeholder":"โ","style":"IPY_MODEL_380d699b391e443594c77e0618acc1e6","value":""}},"726bbc9eda2647089f64254e9afc18a6":{"model_module":"@jupyter-widgets/controls","model_name":"CheckboxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"Add token as git credential?","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_81c738cb1572429fad029c865af5864e","style":"IPY_MODEL_1dbd9abdfd9f441a9a2a92797469029f","value":true}},"730a80d2060d4c0d9ddd2e17f2da0045":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ButtonView","button_style":"","description":"Login","disabled":false,"icon":"","layout":"IPY_MODEL_bdff58ba27c74f89acc6ce2fa028b322","style":"IPY_MODEL_a8d2283aa6d44f1ab1549f4311e88e2d","tooltip":""}},"cd2ea8d1f93c436c8045979227f28f39":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ff6ee54fece6482fa4908c5bd6f35331","placeholder":"โ","style":"IPY_MODEL_4552475fe488474e98941eb5bc34fe1e","value":"\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"}},"e520cbc12c7f45809976dfbfcf56dd64":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":"center","align_self":null,"border":null,"bottom":null,"display":"flex","flex":null,"flex_flow":"column","grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50%"}},"cacc47dd52114b3caa6a0a420f748793":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"435d3880497f437fbe82c5c5aea4723b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f2c6a7c598a2446d980e5b099f8b0504":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"380d699b391e443594c77e0618acc1e6":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"81c738cb1572429fad029c865af5864e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1dbd9abdfd9f441a9a2a92797469029f":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"bdff58ba27c74f89acc6ce2fa028b322":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a8d2283aa6d44f1ab1549f4311e88e2d":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":null,"font_weight":""}},"ff6ee54fece6482fa4908c5bd6f35331":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4552475fe488474e98941eb5bc34fe1e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"349de155fbbb411b98558636e5b363e5":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_29721702addc4325b2d6578e51ad6212","placeholder":"โ","style":"IPY_MODEL_ff3d0f971a534f23928c1c9b133ade05","value":"Connecting..."}},"29721702addc4325b2d6578e51ad6212":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ff3d0f971a534f23928c1c9b133ade05":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"38d4d232d70d49dd8c3ab620e6cfb96c":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_bb9ba62e3cd74e5d965fd6d7cbfffcdb","placeholder":"โ","style":"IPY_MODEL_6d01340c7ea248da9b089906ddb0743f","value":"Token is valid (permission: write)."}},"7dcd8bfea49a447390fd3d693ce473f8":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_520fd7520fe4457f88e1e7bdcbff3e99","placeholder":"โ","style":"IPY_MODEL_66775e202d174977937a2bb33552e08d","value":"Your token has been saved in your configured git credential helpers (store)."}},"a827efea829546b7b7e5e42a465849e4":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ab2576b47a964778a4fb23a0177c2372","placeholder":"โ","style":"IPY_MODEL_a99d5e99af0748a289fa755b80c2ceaf","value":"Your token has been saved to /root/.cache/huggingface/token"}},"fee5d6bf794f4cb7962ef9985fbf4348":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_129d75c4582a42b98245c5a79ea22525","placeholder":"โ","style":"IPY_MODEL_92fdf3c90389449595e1d7b3605f6953","value":"Login successful"}},"bb9ba62e3cd74e5d965fd6d7cbfffcdb":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6d01340c7ea248da9b089906ddb0743f":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"520fd7520fe4457f88e1e7bdcbff3e99":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"66775e202d174977937a2bb33552e08d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ab2576b47a964778a4fb23a0177c2372":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a99d5e99af0748a289fa755b80c2ceaf":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"129d75c4582a42b98245c5a79ea22525":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"92fdf3c90389449595e1d7b3605f6953":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"b2e8914a604a4cd7a8160a247b46897e":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_80b1d408c82c4a16b237c9ca6ff853a9","IPY_MODEL_9ae5d008fbdb49e793eeca063f8a9b79","IPY_MODEL_e0f4a69b292d4821b24b1e0f8c85d994"],"layout":"IPY_MODEL_ac74b0890fdb4386a50184258f6efea6"}},"80b1d408c82c4a16b237c9ca6ff853a9":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_329a5858a60f4140b693ad2d40f2666c","placeholder":"โ","style":"IPY_MODEL_ed062bc006874d5a975c048bf1b49111","value":"Loading checkpoint shards: 100%"}},"9ae5d008fbdb49e793eeca063f8a9b79":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_0405581206a04b8f9e462b4a97a9b396","max":2,"min":0,"orientation":"horizontal","style":"IPY_MODEL_d39c0747d6da4f9095fb300b7ecdee14","value":2}},"e0f4a69b292d4821b24b1e0f8c85d994":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_35a047ec6fdd44df851354380808b081","placeholder":"โ","style":"IPY_MODEL_51396a17ef894a3dbddbcc21f59e6fe9","value":" 2/2 [01:19<00:00, 38.18s/it]"}},"ac74b0890fdb4386a50184258f6efea6":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"329a5858a60f4140b693ad2d40f2666c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ed062bc006874d5a975c048bf1b49111":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"0405581206a04b8f9e462b4a97a9b396":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d39c0747d6da4f9095fb300b7ecdee14":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"35a047ec6fdd44df851354380808b081":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"51396a17ef894a3dbddbcc21f59e6fe9":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":5}
|
lora-midm-7b-food-order-understanding-20231211T145454Z-001.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f74d33cf7c67bd00d5fef90a7113947e08c2c0ff47a1ae7129fc4c006292c5e
|
3 |
+
size 247885449
|