{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "JHRpOZ5g3Flv"
   },
   "source": [
    "# Clone Mergekit and Install the dependencies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "x8548KdSbMs2"
   },
   "outputs": [],
   "source": [
    "!nvidia-smi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "4alsYntU1gNU"
   },
   "outputs": [],
   "source": [
    "!pip install -qqq git+https://github.com/arcee-ai/mergekit.git"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "DtGY8BAo3alb"
   },
   "source": [
    "# Mergekit Config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "CmfbveTblP0F"
   },
   "outputs": [],
   "source": [
    "# @markdown What is your model's name will be?\n",
    "MODEL_NAME = 'SmolMoE' # @param {type:\"string\"}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "r2-rAjH93w8x"
   },
   "outputs": [],
   "source": [
    "mergekit_yaml = \"\"\"\n",
    "base_model: BEE-spoke-data/smol_llama-220M-GQA\n",
    "gate_mode: random\n",
    "dtype: bfloat16\n",
    "experts:\n",
    "  - source_model: BEE-spoke-data/smol_llama-220M-GQA\n",
    "  - source_model: BEE-spoke-data/smol_llama-220M-GQA\n",
    "\"\"\" # @param {type:\"string\"}\n",
    "with open('config.yaml', 'w', encoding=\"utf-8\") as f:\n",
    "    f.write(mergekit_yaml)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "WiCGZXysn_mD"
   },
   "source": [
    "# Mergekit Runtime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "0scr7Ed_4GPe"
   },
   "outputs": [],
   "source": [
    "low_cpu_ram = True # @param {type:\"boolean\"}\n",
    "runtime = \"GPU\" # @param [\"CPU\", \"GPU\"]\n",
    "task = \"merge-mega\" # @param [\"merge\", \"merge-mega\", \"moe\", \"extract\"]\n",
    "# @markdown ### Mergekit arguments\n",
    "\n",
    "trust_remote_code = False # @param {type:\"boolean\"}\n",
    "clone_tensors = True # @param {type:\"boolean\"}\n",
    "low_ram = True # @param {type:\"boolean\"}\n",
    "out_shard_size = \"500M\" # @param {type:\"string\"}\n",
    "\n",
    "# @markdown ### Extract LoRA (experimental)\n",
    "base_model = \"unsloth/Llama-3.2-3B-Instruct\" # @param {type:\"string\"}\n",
    "finetuned_model = \"theprint/ReWiz-Llama-3.2-3B\" # @param {type:\"string\"}\n",
    "extract_rank = 32 # @param {type:\"number\"}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "QBhBgX7U52Xn"
   },
   "source": [
    "## Run the program"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "id": "3Y7aBJXL54GJ"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import shutil\n",
    "\n",
    "def empty_folder(folder_path):\n",
    "  if os.path.exists(folder_path):\n",
    "    shutil.rmtree(folder_path)\n",
    "    os.makedirs(folder_path)\n",
    "\n",
    "empty_folder('merge')\n",
    "empty_folder('lora')\n",
    "\n",
    "if task == \"merge\":\n",
    "    cli = \"mergekit-yaml\"\n",
    "elif task == \"merge-mega\":\n",
    "    cli = \"mergekit-mega\"\n",
    "elif task == \"moe\":\n",
    "    cli = \"mergekit-moe\"\n",
    "elif task == \"extract\":\n",
    "    if base_model == \"\" or finetuned_model == \"\":\n",
    "        raise ValueError(\"base_model and finetuned_model cannot be empty\")\n",
    "    !pip install -qqq bitsandbytes\n",
    "    cli = f\"mergekit-extract-lora {finetuned_model} {base_model} lora --rank={extract_rank}\"\n",
    "\n",
    "if task in [\"merge\", \"moe\", \"merge-mega\"]:\n",
    "    cli += \" config.yaml merge --copy-tokenizer --allow-crimes\"\n",
    "    if runtime == \"GPU\":\n",
    "        if task in [\"merge\", \"merge-mega\"]:\n",
    "            cli += \" --cuda\"\n",
    "        elif task == \"moe\":\n",
    "            cli += \" --device cuda --cuda\"\n",
    "    else:\n",
    "        cli += \" --no-cuda\"\n",
    "\n",
    "    if trust_remote_code:\n",
    "        cli += \" --trust-remote-code\"\n",
    "    if clone_tensors:\n",
    "        cli += \" --clone-tensors\"\n",
    "    if low_ram:\n",
    "        cli += f\" --out-shard-size {out_shard_size} --lazy-unpickle\"\n",
    "        if low_cpu_ram:\n",
    "            cli += \" --low-cpu-memory\"\n",
    "print(cli)\n",
    "!{cli}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "HyeGrtGrDn6S"
   },
   "source": [
    "# Inference the Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "wpy7Ahw6hghH"
   },
   "outputs": [],
   "source": [
    "!pip install -qU transformers bitsandbytes accelerate\n",
    "from transformers import AutoTokenizer, pipeline\n",
    "import torch\n",
    "\n",
    "model = \"merge\"\n",
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained(model)\n",
    "generator = pipeline(\n",
    "    \"text-generation\",\n",
    "    model=model,\n",
    "    model_kwargs={\"torch_dtype\": torch.float16, \"load_in_4bit\": False},\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "f05D7q8wiF-5"
   },
   "outputs": [],
   "source": [
    "messages = [{\"role\": \"user\", \"content\": \"Explain what a Mixture of Experts is in less than 100 words.\"}]\n",
    "prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n",
    "outputs = generator(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)\n",
    "print(outputs[0][\"generated_text\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Upload to Hugging Face"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# @title ## Upload model to Hugging Face { display-mode: \"form\" }\n",
    "# @markdown Enter your HF username and the name of Colab secret that stores your [Hugging Face access token](https://huggingface.co/settings/tokens).\n",
    "username = 'username' # @param {type:\"string\"}\n",
    "token_env = 'hf_token' # @param {type:\"string\"}\n",
    "\n",
    "!pip install -qU huggingface_hub\n",
    "\n",
    "import os\n"
    "import yaml\n",
    "\n",
    "from huggingface_hub import HfApi\n",
    "from google.colab import userdata\n",
    "\n",
    "def output_dir():\n",
    "    if os.path.exists('merge') and os.listdir('merge'):\n",
    "        return \"merge\"\n",
    "    if os.path.exists('lora') and os.listdir('lora'):\n",
    "        return \"lora\"\n",
    "    raise ValueError(\"Both folders are empty or do not exist.\")\n",
    "\n",
    "\n",
    "# Defined in the secrets tab in Google Colab\n",
    "api = HfApi(token=userdata.get(token_env))\n",
    "try:\n",
    "    output_dir=output_dir()\n",
    "    api.create_repo(\n",
    "        repo_id=f\"{username}/{MODEL_NAME}\",\n",
    "        repo_type=\"model\",\n",
    "        exist_ok=True,\n",
    "    )\n",
    "    api.upload_folder(\n",
    "        repo_id=f\"{username}/{MODEL_NAME}\",\n",
    "        folder_path=output_dir,\n",
    "    )\n",
    "except ValueError as e:\n",
    "    print(e)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}