mlconvexai
/

Poro-34B-GPTQ-SGroup

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "c9399417-92ea-4474-a6cb-ce1ecf14f8ea",
+   "metadata": {},
+   "source": [
+    "# Poro 34B GPTQ quantization"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8bea76a0-0cce-461e-b167-2f1b6207395e",
+   "metadata": {},
+   "source": [
+    "## Step 1: Import transformers libraries and check the CUDA availability"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "1ca2fc08-52ed-4ca3-b849-fbcc72df11f6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "97e1ee06-325a-4ca5-8426-39ee43fd02f1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "17be0537-e39a-4ad7-b29b-6f4f7d72ead7",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'2.2.1+cu121'"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "torch.__version__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "e05ee325-ce5d-49b6-985e-c66ff88ee3e5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "torch.cuda.is_available()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c8114af7-2cdb-425f-ab8a-2d35462c2977",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "495fc0f8-ecc9-4c76-8251-2829246ee68a",
+   "metadata": {},
+   "source": [
+    "## Step 2: Load the original Poro 34B model from Huggingface and save it locally"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "a5a24fba-71e7-4192-aafc-f95648b261d4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name='LumiOpen/Poro-34B'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "148eeafd-6aae-440e-b30d-5ebdd1a8a4a5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c6a1ba90df9147489c1c4af10080d933",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "tokenizer_config.json:   0%|          | 0.00/286 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4c407d194d1742b091947f92ad455236",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "tokenizer.json:   0%|          | 0.00/5.64M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0392890e4392402086239952185801b1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "special_tokens_map.json:   0%|          | 0.00/545 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "org_tokenizer = AutoTokenizer.from_pretrained(model_name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3d3ac738-3bc6-4c4f-bfc2-81692a80a662",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "14cac38e76464e748d261f4398b62085",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "config.json:   0%|          | 0.00/697 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "383ffbe929054df2beb163e89af423eb",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model.safetensors.index.json:   0%|          | 0.00/57.0k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "22f290c533504c78891a85764d4a4dee",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading shards:   0%|          | 0/14 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a52ddab3f4c04bcd968d481e53d4ce83",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00001-of-00014.safetensors:   0%|          | 0.00/4.71G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "20afc515d7e64b0193d936d8fdb17a1e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00002-of-00014.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "431fbc52ce0a49219cb73664bfd1f9a6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00003-of-00014.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fd7453a0ddd64b3299173e7d5586dab1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00004-of-00014.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "44114f0a82144b3ebdc5e1ae083bfbf7",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00005-of-00014.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f8052d0df811492eb6af45897fe568d2",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00006-of-00014.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "787b3d7a208348f3ad0141b6c839faf1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00007-of-00014.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "acc82eeb37af48088a00b9e4537d4de6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00008-of-00014.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "588c9bec1e15411e929538a83a820356",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00009-of-00014.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "70d44bda93724d62a069dc9c7d3abb02",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00010-of-00014.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4ef2e73b8204473b868043ce8d547148",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00011-of-00014.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3b463343388d42079f6f4452bb9931ce",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00012-of-00014.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1eb6c9827bdc481784d3069d04f7c318",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00013-of-00014.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5662ef8d064b4fa9b70c9b1d1329fdd9",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00014-of-00014.safetensors:   0%|          | 0.00/4.52G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7b7bd1bd16fb46068153e7b6b2f90b29",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/14 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "branch = \"1000B\"\n",
+    "org_model = AutoModelForCausalLM.from_pretrained(model_name,\n",
+    "    torch_dtype=torch.bfloat16,\n",
+    "    revision=branch,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "4edb55ba-908f-4070-8af3-92c7cf33f8d0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_configuration = org_model.config"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "69107145-6808-4add-83fe-c3577893d724",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# original model configuration is missing the sequence length parameter\n",
+    "model_configuration.sequence_length = 2048"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "33225c1e-6205-4ee0-95e9-2b15a2bf9a68",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('Poro-34B/tokenizer_config.json',\n",
+       " 'Poro-34B/special_tokens_map.json',\n",
+       " 'Poro-34B/tokenizer.json')"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Poro 34B is saved locally (this is not required but provides faster processing if there is a need for multiple runs)\n",
+    "org_model.save_pretrained(\"Poro-34B\", max_shard_size=\"5GB\",safe_serialization=True)\n",
+    "org_tokenizer.save_pretrained(\"Poro-34B\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8d28eb15-26aa-4aed-be2b-2e67dd243e92",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1e3ea881-658f-41fc-b09c-df711219653d",
+   "metadata": {},
+   "source": [
+    "## Step 3: Fine-tuned parameters are loaded from local Poro-34B-Lora-185 directory and merged"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "c9cd3923-76ed-42d0-a882-5959cf0abf18",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from peft import PeftModel"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "118992d6-336e-4eb2-9392-9ca77081ece7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_id2 = \"Poro-34B-Lora-185\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "10281b50-740f-4e2f-b2cc-f2d24a7e2f77",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loaded_model = PeftModel.from_pretrained(org_model,model_id2,is_trainable=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "c4ba0fc3-1cea-4e15-b302-d6353d1e970e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Fine-tuned weights are merged to original Poro 34B model\n",
+    "merged_model = loaded_model.merge_and_unload()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "07818cdc-d227-4abf-9437-8be3081aeb11",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('Poro-34B-185c/tokenizer_config.json',\n",
+       " 'Poro-34B-185c/special_tokens_map.json',\n",
+       " 'Poro-34B-185c/tokenizer.json')"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Merged model is saved locally\n",
+    "merged_model.save_pretrained(\"Poro-34B-185c\", max_shard_size=\"5GB\",safe_serialization=True)\n",
+    "org_tokenizer.save_pretrained(\"Poro-34B-185c\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f18349b3-2fd8-4927-9ba4-a442d6217e1b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "93d3bbdc-ceaf-4b19-b5e6-05f4d11cf275",
+   "metadata": {},
+   "source": [
+    "## Step 4: GPTQ quantization is applied to merged fine-tuned model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "e80c7627-da1f-4cb0-a079-6748806c0a0e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_id = \"Poro-34B-185c\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "2af3a06a-0d71-4c0e-aa3b-6f352c278bf2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokenizer = AutoTokenizer.from_pretrained(model_id)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "a302205a-0206-43b1-9bef-ed67547520f6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Dataset is a list of strings, we have here only one string to show the process\n",
+    "dataset = [\"Peruuta ensin vanhaan osoitteeseen tilattu uutiskirje kirjeen alareunan “Peruuta tilaus” -linkistä.\\nTilaa uutiskirje uudelleen oikeaan osoitteeseen.\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "2fabbc2e-436d-4ce1-ad89-4f30bdd977fa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gptq_config = GPTQConfig(bits=4, dataset = dataset, tokenizer=tokenizer)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b6148609-3111-40a8-b748-fc876b9869f9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=gptq_config,low_cpu_mem_usage=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "59a2a544-9ee1-4f4e-b26a-d91de5e8f321",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Quantized model and tokenizer are saved locally\n",
+    "model.save_pretrained(\"Poro-34B-GPTQ-SGroup\", use_safetensors=True)\n",
+    "tokenizer.save_pretrained(\"Poro-34B-GPTQ-SGroup\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5e012ca3-966a-4480-aae3-6c2b67e6dde6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Login to Huggingface\n",
+    "from huggingface_hub import notebook_login\n",
+    "notebook_login()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "587afbc0-8b81-4807-bed8-6af145845b95",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Quantized model and tokenizer are saved to Huggingface\n",
+    "model.push_to_hub(\"Poro-34B-GPTQ-SGroup\", use_safetensors=True)\n",
+    "tokenizer.push_to_hub(\"Poro-34B-GPTQ-SGroup\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f02d72bb-e75b-415f-b791-254246c5f971",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "df85b2dc-22e0-40da-b1c1-fee4095c31be",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "802bf734-e951-4aa0-9512-99ff7bf952f9",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f287018e-07b4-4080-b286-e905059f2f90",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6baf3c59-611b-47e0-9737-d25952c98c70",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "84f6aef7-ff91-4bac-add3-3e3c6b4667ca",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8feaaca1-b05e-4e4e-97ce-18931d908eb7",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}