{ "cells": [ { "cell_type": "markdown", "id": "c9399417-92ea-4474-a6cb-ce1ecf14f8ea", "metadata": {}, "source": [ "# Poro 34B GPTQ quantization" ] }, { "cell_type": "markdown", "id": "8bea76a0-0cce-461e-b167-2f1b6207395e", "metadata": {}, "source": [ "## Step 1: Import transformers libraries and check the CUDA availability" ] }, { "cell_type": "code", "execution_count": 1, "id": "1ca2fc08-52ed-4ca3-b849-fbcc72df11f6", "metadata": {}, "outputs": [], "source": [ "from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig" ] }, { "cell_type": "code", "execution_count": 2, "id": "97e1ee06-325a-4ca5-8426-39ee43fd02f1", "metadata": {}, "outputs": [], "source": [ "import torch" ] }, { "cell_type": "code", "execution_count": 4, "id": "17be0537-e39a-4ad7-b29b-6f4f7d72ead7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'2.2.1+cu121'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.__version__" ] }, { "cell_type": "code", "execution_count": 5, "id": "e05ee325-ce5d-49b6-985e-c66ff88ee3e5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.cuda.is_available()" ] }, { "cell_type": "code", "execution_count": null, "id": "c8114af7-2cdb-425f-ab8a-2d35462c2977", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "495fc0f8-ecc9-4c76-8251-2829246ee68a", "metadata": {}, "source": [ "## Step 2: Load the original Poro 34B model from Huggingface and save it locally" ] }, { "cell_type": "code", "execution_count": 3, "id": "a5a24fba-71e7-4192-aafc-f95648b261d4", "metadata": {}, "outputs": [], "source": [ "model_name='LumiOpen/Poro-34B'" ] }, { "cell_type": "code", "execution_count": 4, "id": "148eeafd-6aae-440e-b30d-5ebdd1a8a4a5", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c6a1ba90df9147489c1c4af10080d933", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer_config.json: 0%| | 0.00/286 [00:00