{ "cells": [ { "cell_type": "markdown", "id": "0c4ecb49-ce58-4b65-849a-760980576e48", "metadata": {}, "source": [ "# Poro34B Lora fine-tuning with S-Group's data - 1 Q/A" ] }, { "cell_type": "code", "execution_count": null, "id": "5b686006-65a7-43af-8207-1c7309a5e423", "metadata": {}, "outputs": [], "source": [ "# This script finetunes the Poro34B model with 1 Question and Answer pair" ] }, { "cell_type": "markdown", "id": "defcdb6f-3b69-4b03-b2dc-07c4b3027fd6", "metadata": {}, "source": [ "## Initialization" ] }, { "cell_type": "code", "execution_count": null, "id": "67f730e6-3467-4a19-ab76-e8baace8e02e", "metadata": {}, "outputs": [], "source": [ "# pip install peft, all other Python libraries are already in AWS image\n", "!pip install peft" ] }, { "cell_type": "code", "execution_count": 2, "id": "80b24df2-140b-4792-aaf1-6f6aff92ece8", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-02-29 15:06:36.945989: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" ] } ], "source": [ "import torch\n", "import json\n", "from transformers import AutoModelForCausalLM, AutoTokenizer \n", "from transformers import TrainingArguments, Trainer\n", "from transformers import pipeline\n", "from peft import get_peft_model, PromptTuningConfig, TaskType, PromptTuningInit\n", "from datasets import load_dataset" ] }, { "cell_type": "code", "execution_count": 3, "id": "d31adfc6-a460-419e-871b-d0437501b026", "metadata": {}, "outputs": [], "source": [ "# this checks wether we have GPU\n", "device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")" ] }, { "cell_type": "code", "execution_count": 4, "id": "2c5a9b07-c92b-4d1d-b5b5-96e8c234e14f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "cpu\n" ] } ], "source": [ "print(device)" ] }, { "cell_type": "markdown", "id": "6ea88a10-f5f1-4342-939b-60d2b9c5bb91", "metadata": {}, "source": [ "## Foundation model import" ] }, { "cell_type": "code", "execution_count": 5, "id": "2c0f7b3a-9d56-46ce-9dc8-5fe40b2628a6", "metadata": {}, "outputs": [], "source": [ "# Foundation model\n", "model_name='LumiOpen/Poro-34B'" ] }, { "cell_type": "code", "execution_count": 6, "id": "4e4c9089-a195-4fd7-91b2-6240cafb4989", "metadata": {}, "outputs": [], "source": [ "tokenizer = AutoTokenizer.from_pretrained(model_name)" ] }, { "cell_type": "code", "execution_count": 7, "id": "a42e0fb6-40d4-483b-a034-84ff351c021d", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3a476b270f8d413c8d54e413fe791a82", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/14 [00:00\n", " \n", " \n", " [20/20 27:40, Epoch 20/20]\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
10.816400
20.808600
30.808600
40.804700
50.793000
60.757800
70.699200
80.640600
90.570300
100.492200
110.392600
120.291000
130.196300
140.140600
150.112800
160.090300
170.064500
180.048800
190.024900
200.018900

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "TrainOutput(global_step=20, training_loss=0.428607177734375, metrics={'train_runtime': 1747.2099, 'train_samples_per_second': 0.183, 'train_steps_per_second': 0.011, 'total_flos': 219858091622400.0, 'train_loss': 0.428607177734375, 'epoch': 20.0})" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trainer.train()" ] }, { "cell_type": "markdown", "id": "1ed2cf09-3683-4016-88d9-9ada1ddb4345", "metadata": {}, "source": [ "## Saving the finetuned model" ] }, { "cell_type": "code", "execution_count": 13, "id": "c37902bf-47e5-4f89-9128-a6b7d91cb437", "metadata": {}, "outputs": [], "source": [ "model_id = \"Poro-34B-Lora-1\"" ] }, { "cell_type": "code", "execution_count": null, "id": "163b54c4-3027-4e0d-9d52-7e3d698020da", "metadata": {}, "outputs": [], "source": [ "peft_model.save_pretrained(model_id)" ] }, { "cell_type": "code", "execution_count": null, "id": "ec432db5-4f0c-43c7-b4e4-ef087f057bd0", "metadata": {}, "outputs": [], "source": [ "!ls -lh {model_id} # Lora parameters file size" ] }, { "cell_type": "markdown", "id": "11460d4e-3e11-4fdb-b134-61b45bb84018", "metadata": {}, "source": [ "## Testing" ] }, { "cell_type": "code", "execution_count": 8, "id": "eb6a1213-a7ab-4bb5-8ffc-0e2666286dc6", "metadata": {}, "outputs": [], "source": [ "def generate_output(model, inputs, max_new_tokens=100):\n", " outputs = model.generate(\n", " input_ids=inputs[\"input_ids\"],\n", " max_new_tokens=max_new_tokens,\n", " temperature=0.1,\n", " )\n", " return outputs" ] }, { "cell_type": "markdown", "id": "0a844312-2a1e-4c76-9078-96506b252522", "metadata": {}, "source": [ "### Original model" ] }, { "cell_type": "code", "execution_count": 9, "id": "d38bbed0-e938-43ef-b816-b5e0f9d066fd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Given the question delimited by triple backticks ```{ Kuinka vaihdan uutiskirjeen sähköpostiosoitteen? }```, what is the answer? Answer: ```{ Voit vaihtaa uutiskirjeen sähköpostiosoitteen kirjautumalla sisään ja menemällä Oma tili -osioon. }```\\n']\n" ] } ], "source": [ "prompt = tokenizer('Given the question delimited by triple backticks ```{ Kuinka vaihdan uutiskirjeen sähköpostiosoitteen? }```, what is the answer? Answer:', return_tensors=\"pt\")\n", "result = generate_output(model,prompt)\n", "print(tokenizer.batch_decode(result, skip_special_tokens=True))" ] }, { "cell_type": "markdown", "id": "ae3c3d6a-2b07-4e46-9ddc-dccadfd07196", "metadata": {}, "source": [ "### Finetuned model" ] }, { "cell_type": "code", "execution_count": 11, "id": "4cf53f39-ad3f-43e2-8daa-79853b054cd2", "metadata": {}, "outputs": [], "source": [ "from peft import PeftModel" ] }, { "cell_type": "code", "execution_count": 14, "id": "142bf57d-cffc-47b2-ae91-8a5420c46d32", "metadata": {}, "outputs": [], "source": [ "loaded_model = PeftModel.from_pretrained(model,model_id,is_trainable=False)" ] }, { "cell_type": "code", "execution_count": 15, "id": "c3cacd26-edff-494c-9428-55b7659988de", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Given the question delimited by triple backticks ```{ Kuinka vaihdan uutiskirjeen sähköpostiosoitteen? }```, what is the answer? Answer: { Peruuta ensin vanhaan osoitteeseen tilattu uutiskirje kirjeen alareunan “Peruuta tilaus” -linkistä.\\nTilaa uutiskirje uudelleen oikeaan osoitteeseen. }.\\nKuinka vaihdan uutiskirjeen sähköpostiosoitteen?\\nPeruuta ensin vanhaan osoitteeseen tilattu uutiskirje kirjeen alareunan “Peruuta tilaus” -linkistä.\\nTilaa uutiskirje uudelleen oikeaan osoitteeseen.\\nPeruuta uutiskirjeen tilaus kirjeen alareunan “Peruuta tilaus” -linkistä.\\nTilaa uutiskirje uudelleen oikeaan osoitteeseen.\\nPeruuta uutiskirjeen tilaus kirjeen alareunan “Peruuta tilaus” -linkistä.\\nTilaa uutiskirje']\n" ] } ], "source": [ "prompt = tokenizer('Given the question delimited by triple backticks ```{ Kuinka vaihdan uutiskirjeen sähköpostiosoitteen? }```, what is the answer? Answer:', return_tensors=\"pt\")\n", "result = generate_output(loaded_model,prompt)\n", "print(tokenizer.batch_decode(result, skip_special_tokens=True))" ] }, { "cell_type": "code", "execution_count": null, "id": "166c476c-01a2-49cc-b03f-6cb1d9ae6136", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 5 }