{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "UXKT8SDQQ1tI" }, "outputs": [], "source": [ "%%capture\n", "import torch\n", "import re\n", "from pprint import pprint\n", "major_version, minor_version = torch.cuda.get_device_capability()\n", "if major_version >= 8:\n", " # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)\n", " !pip install \"unsloth[colab-ampere] @ git+https://github.com/unslothai/unsloth.git\"\n", "else:\n", " # Use this for older GPUs (V100, Tesla T4, RTX 20xx)\n", " !pip install \"unsloth[colab] @ git+https://github.com/unslothai/unsloth.git\"\n", "pass" ] }, { "cell_type": "code", "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "max_seq_length = 2048\n", "# Choose any! We auto support RoPE Scaling internally!\n", "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n", "load_in_4bit = True" ], "metadata": { "id": "Q6gVomWzQ7hU" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"neuralwebtech/mental_health_counseling_gemma_7b_4bit_q\", # YOUR MODEL YOU USED FOR TRAINING\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", ")\n", "FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n", "\n", "alpaca_prompt = \"\"\"Below is an instruction that describes a task, paired with an input that provides further context.\n", " Write a response that appropriately completes the request.\n", "\n", "### Context:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"" ], "metadata": { "id": "_ItV-FhgRC5t" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "inputs = tokenizer(\n", "[\n", " alpaca_prompt.format(\n", " text, # instruction\n", " \"\", # output - leave this blank for generation!\n", " )\n", "], return_tensors = \"pt\").to(\"cuda\")\n", "\n", "outputs = model.generate(**inputs, max_new_tokens = 128, use_cache = True)\n", "final_out=tokenizer.batch_decode(outputs)\n" ], "metadata": { "id": "8eTx88KiRDiL" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "def print_response(lines):\n", " text = '\\n'.join(lines)\n", " response_match = re.search(r'### Response:\\s*(.*)', text)\n", " if response_match:\n", " response = response_match.group(1)\n", " return response\n", " else:\n", " return \"No response\"" ], "metadata": { "id": "z5s-5_0MRHPt" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "pprint(print_response(final_out))" ], "metadata": { "id": "_DlE2xjBRHUk" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [], "metadata": { "id": "xHwuwJ-6RHck" }, "execution_count": null, "outputs": [] } ] }