Spaces:
Running
Running
File size: 15,449 Bytes
1393b01 796d506 0585716 796d506 26a1157 7295302 ad03828 796d506 64e99f5 796d506 c5377ed 7295302 211a715 796d506 26a1157 796d506 7295302 50ef1f9 c5377ed 796d506 7295302 796d506 c262148 796d506 922a193 796d506 1393b01 c5377ed 1393b01 796d506 c5377ed 0585716 796d506 0585716 c5377ed 796d506 1393b01 0585716 c5377ed 1393b01 c5377ed 1393b01 0585716 b323e3d 5888100 796d506 64e99f5 6188097 e4c8ce8 6188097 0585716 c5377ed 1393b01 0585716 211a715 8be82f3 26a1157 8be82f3 c5377ed e7f2f83 796d506 205190d 0585716 b323e3d 0585716 b8352d5 73a53d1 796d506 c5377ed ad03828 c5377ed 8153ea9 c5377ed 1b211c5 c5377ed 64e99f5 ad03828 26a1157 c5377ed 26a1157 ad03828 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 |
import os
import pathlib
import random
import string
import tempfile
import time
from concurrent.futures import ThreadPoolExecutor
from typing import Iterable, List
import gradio as gr
import huggingface_hub
import torch
import yaml
import bitsandbytes
from gradio_logsview.logsview import Log, LogsView, LogsViewRunner
from mergekit.config import MergeConfiguration
from clean_community_org import garbage_collect_empty_models
has_gpu = torch.cuda.is_available()
# Running directly from Python doesn't work well with Gradio+run_process because of:
# Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
# Let's use the CLI instead.
#
# import mergekit.merge
# from mergekit.common import parse_kmb
# from mergekit.options import MergeOptions
#
# merge_options = (
# MergeOptions(
# copy_tokenizer=True,
# cuda=True,
# low_cpu_memory=True,
# write_model_card=True,
# )
# if has_gpu
# else MergeOptions(
# allow_crimes=True,
# out_shard_size=parse_kmb("1B"),
# lazy_unpickle=True,
# write_model_card=True,
# )
# )
cli = "config.yaml merge --copy-tokenizer" + (
" --cuda --low-cpu-memory --allow-crimes" if has_gpu else " --allow-crimes --lazy-unpickle"
)
MARKDOWN_DESCRIPTION = """
# mergekit-gui
The fastest way to perform a model merge π₯
Specify a YAML configuration file (see examples below) and a HF token and this app will perform the merge and upload the merged model to your user profile.
"""
MARKDOWN_ARTICLE = """
___
## Merge Configuration
[Mergekit](https://github.com/arcee-ai/mergekit) configurations are YAML documents specifying the operations to perform in order to produce your merged model.
Below are the primary elements of a configuration file:
- `merge_method`: Specifies the method to use for merging models. See [Merge Methods](https://github.com/arcee-ai/mergekit#merge-methods) for a list.
- `slices`: Defines slices of layers from different models to be used. This field is mutually exclusive with `models`.
- `models`: Defines entire models to be used for merging. This field is mutually exclusive with `slices`.
- `base_model`: Specifies the base model used in some merging methods.
- `parameters`: Holds various parameters such as weights and densities, which can also be specified at different levels of the configuration.
- `dtype`: Specifies the data type used for the merging operation.
- `tokenizer_source`: Determines how to construct a tokenizer for the merged model.
## Merge Methods
A quick overview of the currently supported merge methods:
| Method | `merge_method` value | Multi-Model | Uses base model |
| -------------------------------------------------------------------------------------------- | -------------------- | ----------- | --------------- |
| Linear ([Model Soups](https://arxiv.org/abs/2203.05482)) | `linear` | β
| β |
| SLERP | `slerp` | β | β
|
| [Task Arithmetic](https://arxiv.org/abs/2212.04089) | `task_arithmetic` | β
| β
|
| [TIES](https://arxiv.org/abs/2306.01708) | `ties` | β
| β
|
| [DARE](https://arxiv.org/abs/2311.03099) [TIES](https://arxiv.org/abs/2306.01708) | `dare_ties` | β
| β
|
| [DARE](https://arxiv.org/abs/2311.03099) [Task Arithmetic](https://arxiv.org/abs/2212.04089) | `dare_linear` | β
| β
|
| Passthrough | `passthrough` | β | β |
| [Model Stock](https://arxiv.org/abs/2403.19522) | `model_stock` | β
| β
|
## Citation
This GUI is powered by [Arcee's MergeKit](https://arxiv.org/abs/2403.13257).
If you use it in your research, please cite the following paper:
```
@article{goddard2024arcee,
title={Arcee's MergeKit: A Toolkit for Merging Large Language Models},
author={Goddard, Charles and Siriwardhana, Shamane and Ehghaghi, Malikeh and Meyers, Luke and Karpukhin, Vlad and Benedict, Brian and McQuade, Mark and Solawetz, Jacob},
journal={arXiv preprint arXiv:2403.13257},
year={2024}
}
```
This Space is heavily inspired by LazyMergeKit by Maxime Labonne (see [Colab](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb)).
"""
examples = [[str(f)] for f in pathlib.Path("examples").glob("*.yaml")]
# Do not set community token as `HF_TOKEN` to avoid accidentally using it in merge scripts.
# `COMMUNITY_HF_TOKEN` is used to upload models to the community organization (https://huggingface.co/djuna-test-lab)
# when user do not provide a token.
COMMUNITY_HF_TOKEN = os.getenv("COMMUNITY_HF_TOKEN")
def merge(program: str, yaml_config: str, out_shard_size: str, hf_token: str, repo_name: str) -> Iterable[List[Log]]:
runner = LogsViewRunner()
if not yaml_config:
yield runner.log("Empty yaml, pick an example below", level="ERROR")
return
# TODO: validate moe config and mega config?
if program not in ("mergekit-moe", "mergekit-mega"):
try:
merge_config = MergeConfiguration.model_validate(yaml.safe_load(yaml_config))
except Exception as e:
yield runner.log(f"Invalid yaml {e}", level="ERROR")
return
is_community_model = False
if not hf_token:
if "/" in repo_name and not repo_name.startswith("djuna-test-lab/"):
yield runner.log(
f"Cannot upload merge model to namespace {repo_name.split('/')[0]}: you must provide a valid token.",
level="ERROR",
)
return
yield runner.log(
"No HF token provided. Your merged model will be uploaded to the https://huggingface.co/djuna-test-lab organization."
)
is_community_model = True
if not COMMUNITY_HF_TOKEN:
raise gr.Error("Cannot upload to community org: community token not set by Space owner.")
hf_token = COMMUNITY_HF_TOKEN
api = huggingface_hub.HfApi(token=hf_token)
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdirname:
tmpdir = pathlib.Path(tmpdirname)
merged_path = tmpdir / "merged"
merged_path.mkdir(parents=True, exist_ok=True)
config_path = merged_path / "config.yaml"
config_path.write_text(yaml_config)
yield runner.log(f"Merge configuration saved in {config_path}")
if not repo_name:
yield runner.log("No repo name provided. Generating a random one.")
repo_name = f"mergekit-{merge_config.merge_method}"
# Make repo_name "unique" (no need to be extra careful on uniqueness)
repo_name += "-" + "".join(random.choices(string.ascii_lowercase, k=7))
repo_name = repo_name.replace("/", "-").strip("-")
if is_community_model and not repo_name.startswith("djuna-test-lab/"):
repo_name = f"djuna-test-lab/{repo_name}"
try:
yield runner.log(f"Creating repo {repo_name}")
repo_url = api.create_repo(repo_name, exist_ok=True)
yield runner.log(f"Repo created: {repo_url}")
except Exception as e:
yield runner.log(f"Error creating repo {e}", level="ERROR")
return
# Set tmp HF_HOME to avoid filling up disk Space
tmp_env = os.environ.copy() # taken from https://stackoverflow.com/a/4453495
tmp_env["HF_HOME"] = f"{tmpdirname}/.cache"
full_cli = f"{program} {cli} --lora-merge-cache {tmpdirname}/.lora_cache --out-shard-size {out_shard_size}"
yield from runner.run_command(full_cli.split(), cwd=merged_path, env=tmp_env)
if runner.exit_code != 0:
yield runner.log("Merge failed. Deleting repo as no model is uploaded.", level="ERROR")
api.delete_repo(repo_url.repo_id)
return
yield runner.log("Model merged successfully. Uploading to HF.")
yield from runner.run_python(
api.upload_folder,
repo_id=repo_url.repo_id,
folder_path=merged_path / "merge",
)
yield runner.log(f"Model successfully uploaded to HF: {repo_url.repo_id}")
def extract(finetuned_model: str, base_model: str, rank: int, hf_token: str, repo_name: str) -> Iterable[List[Log]]:
runner = LogsViewRunner()
if not finetuned_model or not base_model:
yield runner.log("All field should be filled")
is_community_model = False
if not hf_token:
if "/" in repo_name and not repo_name.startswith("djuna-test-lab/"):
yield runner.log(
f"Cannot upload merge model to namespace {repo_name.split('/')[0]}: you must provide a valid token.",
level="ERROR",
)
return
yield runner.log(
"No HF token provided. Your lora will be uploaded to the https://huggingface.co/djuna-test-lab organization."
)
is_community_model = True
if not COMMUNITY_HF_TOKEN:
raise gr.Error("Cannot upload to community org: community token not set by Space owner.")
hf_token = COMMUNITY_HF_TOKEN
api = huggingface_hub.HfApi(token=hf_token)
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdirname:
tmpdir = pathlib.Path(tmpdirname)
merged_path = tmpdir / "merged"
merged_path.mkdir(parents=True, exist_ok=True)
if not repo_name:
yield runner.log("No repo name provided. Generating a random one.")
repo_name = "lora"
# Make repo_name "unique" (no need to be extra careful on uniqueness)
repo_name += "-" + "".join(random.choices(string.ascii_lowercase, k=7))
repo_name = repo_name.replace("/", "-").strip("-")
if is_community_model and not repo_name.startswith("djuna-test-lab/"):
repo_name = f"djuna-test-lab/{repo_name}"
try:
yield runner.log(f"Creating repo {repo_name}")
repo_url = api.create_repo(repo_name, exist_ok=True)
yield runner.log(f"Repo created: {repo_url}")
except Exception as e:
yield runner.log(f"Error creating repo {e}", level="ERROR")
return
# Set tmp HF_HOME to avoid filling up disk Space
tmp_env = os.environ.copy() # taken from https://stackoverflow.com/a/4453495
tmp_env["HF_HOME"] = f"{tmpdirname}/.cache"
full_cli = f"mergekit-extract-lora {finetuned_model} {base_model} lora --rank={rank}"
yield from runner.run_command(full_cli.split(), cwd=merged_path, env=tmp_env)
if runner.exit_code != 0:
yield runner.log("Lora extraction failed. Deleting repo as no lora is uploaded.", level="ERROR")
api.delete_repo(repo_url.repo_id)
return
yield runner.log("Lora extracted successfully. Uploading to HF.")
yield from runner.run_python(
api.upload_folder,
repo_id=repo_url.repo_id,
folder_path=merged_path / "lora",
)
yield runner.log(f"Lora successfully uploaded to HF: {repo_url.repo_id}")
with gr.Blocks() as demo:
gr.Markdown(MARKDOWN_DESCRIPTION)
with gr.Tabs():
with gr.TabItem("Merge Model"):
with gr.Row():
filename = gr.Textbox(visible=False, label="filename")
config = gr.Code(language="yaml", lines=10, label="config.yaml")
with gr.Column():
program = gr.Dropdown(
["mergekit-yaml", "mergekit-mega", "mergekit-moe"],
label="Mergekit Command",
info="Choose CLI",
)
out_shard_size = gr.Dropdown(
["500M", "1B", "2B", "3B", "4B", "5B"],
label="Output Shard Size",
value="500M",
)
token = gr.Textbox(
lines=1,
label="HF Write Token",
info="https://hf.co/settings/token",
type="password",
placeholder="Optional. Will upload merged model to MergeKit Community if empty.",
)
repo_name = gr.Textbox(
lines=1,
label="Repo name",
placeholder="Optional. Will create a random name if empty.",
)
button = gr.Button("Merge", variant="primary")
logs = LogsView(label="Terminal output")
button.click(fn=merge, inputs=[program, config, out_shard_size, token, repo_name], outputs=[logs])
with gr.TabItem("LORA Extraction"):
with gr.Row():
with gr.Column():
finetuned_model = gr.Textbox(
lines=1,
label="Finetuned Model",
)
base_model = gr.Textbox(
lines=1,
label="Base Model",
)
rank = gr.Dropdown(
[32, 64, 128],
label="Rank level",
value=32,
)
with gr.Column():
token = gr.Textbox(
lines=1,
label="HF Write Token",
info="https://hf.co/settings/token",
type="password",
placeholder="Optional. Will upload merged model to MergeKit Community if empty.",
)
repo_name = gr.Textbox(
lines=1,
label="Repo name",
placeholder="Optional. Will create a random name if empty.",
)
button = gr.Button("Extract LORA", variant="primary")
logs = LogsView(label="Terminal output")
button.click(fn=extract, inputs=[finetuned_model, base_model, rank, token, repo_name], outputs=[logs])
gr.Examples(
examples,
fn=lambda s: (s,),
run_on_click=True,
label="Examples",
inputs=[filename],
outputs=[config],
)
gr.Markdown(MARKDOWN_ARTICLE)
# Run garbage collection every hour to keep the community org clean.
# Empty models might exist if the merge fails abruptly (e.g. if user leaves the Space).
def _garbage_collect_every_hour():
while True:
try:
garbage_collect_empty_models(token=COMMUNITY_HF_TOKEN)
except Exception as e:
print("Error running garbage collection", e)
time.sleep(3600)
pool = ThreadPoolExecutor()
pool.submit(_garbage_collect_every_hour)
demo.queue(default_concurrency_limit=1).launch()
|