stillerman
commited on
Commit
•
738a057
1
Parent(s):
cdc71f7
Feat: Added Gradio support (#812)
Browse files* Added gradio support
* queuing and title
* pre-commit run
- README.md +8 -0
- requirements.txt +1 -0
- src/axolotl/cli/__init__.py +88 -1
- src/axolotl/cli/inference.py +11 -3
README.md
CHANGED
@@ -97,6 +97,10 @@ accelerate launch -m axolotl.cli.train examples/openllama-3b/lora.yml
|
|
97 |
# inference
|
98 |
accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
|
99 |
--lora_model_dir="./lora-out"
|
|
|
|
|
|
|
|
|
100 |
```
|
101 |
|
102 |
## Installation
|
@@ -919,6 +923,10 @@ Pass the appropriate flag to the train command:
|
|
919 |
cat /tmp/prompt.txt | python -m axolotl.cli.inference examples/your_config.yml \
|
920 |
--base_model="./completed-model" --prompter=None --load_in_8bit=True
|
921 |
```
|
|
|
|
|
|
|
|
|
922 |
|
923 |
Please use `--sample_packing False` if you have it on and receive the error similar to below:
|
924 |
|
|
|
97 |
# inference
|
98 |
accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
|
99 |
--lora_model_dir="./lora-out"
|
100 |
+
|
101 |
+
# gradio
|
102 |
+
accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
|
103 |
+
--lora_model_dir="./lora-out" --gradio
|
104 |
```
|
105 |
|
106 |
## Installation
|
|
|
923 |
cat /tmp/prompt.txt | python -m axolotl.cli.inference examples/your_config.yml \
|
924 |
--base_model="./completed-model" --prompter=None --load_in_8bit=True
|
925 |
```
|
926 |
+
-- With gradio hosting
|
927 |
+
```bash
|
928 |
+
python -m axolotl.cli.inference examples/your_config.yml --gradio
|
929 |
+
```
|
930 |
|
931 |
Please use `--sample_packing False` if you have it on and receive the error similar to below:
|
932 |
|
requirements.txt
CHANGED
@@ -31,3 +31,4 @@ scikit-learn==1.2.2
|
|
31 |
pynvml
|
32 |
art
|
33 |
fschat==0.2.29
|
|
|
|
31 |
pynvml
|
32 |
art
|
33 |
fschat==0.2.29
|
34 |
+
gradio
|
src/axolotl/cli/__init__.py
CHANGED
@@ -6,8 +6,10 @@ import os
|
|
6 |
import random
|
7 |
import sys
|
8 |
from pathlib import Path
|
|
|
9 |
from typing import Any, Dict, List, Optional, Union
|
10 |
|
|
|
11 |
import torch
|
12 |
import yaml
|
13 |
|
@@ -16,7 +18,7 @@ from accelerate.commands.config import config_args
|
|
16 |
from art import text2art
|
17 |
from huggingface_hub import HfApi
|
18 |
from huggingface_hub.utils import LocalTokenNotFoundError
|
19 |
-
from transformers import GenerationConfig, TextStreamer
|
20 |
|
21 |
from axolotl.common.cli import TrainerCliArgs, load_model_and_tokenizer
|
22 |
from axolotl.logging_config import configure_logging
|
@@ -153,6 +155,91 @@ def do_inference(
|
|
153 |
print(tokenizer.decode(generated["sequences"].cpu().tolist()[0]))
|
154 |
|
155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
def choose_config(path: Path):
|
157 |
yaml_files = list(path.glob("*.yml"))
|
158 |
|
|
|
6 |
import random
|
7 |
import sys
|
8 |
from pathlib import Path
|
9 |
+
from threading import Thread
|
10 |
from typing import Any, Dict, List, Optional, Union
|
11 |
|
12 |
+
import gradio as gr
|
13 |
import torch
|
14 |
import yaml
|
15 |
|
|
|
18 |
from art import text2art
|
19 |
from huggingface_hub import HfApi
|
20 |
from huggingface_hub.utils import LocalTokenNotFoundError
|
21 |
+
from transformers import GenerationConfig, TextIteratorStreamer, TextStreamer
|
22 |
|
23 |
from axolotl.common.cli import TrainerCliArgs, load_model_and_tokenizer
|
24 |
from axolotl.logging_config import configure_logging
|
|
|
155 |
print(tokenizer.decode(generated["sequences"].cpu().tolist()[0]))
|
156 |
|
157 |
|
158 |
+
def do_inference_gradio(
|
159 |
+
*,
|
160 |
+
cfg: DictDefault,
|
161 |
+
cli_args: TrainerCliArgs,
|
162 |
+
):
|
163 |
+
model, tokenizer = load_model_and_tokenizer(cfg=cfg, cli_args=cli_args)
|
164 |
+
prompter = cli_args.prompter
|
165 |
+
default_tokens = {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>"}
|
166 |
+
|
167 |
+
for token, symbol in default_tokens.items():
|
168 |
+
# If the token isn't already specified in the config, add it
|
169 |
+
if not (cfg.special_tokens and token in cfg.special_tokens):
|
170 |
+
tokenizer.add_special_tokens({token: symbol})
|
171 |
+
|
172 |
+
prompter_module = None
|
173 |
+
if prompter:
|
174 |
+
prompter_module = getattr(
|
175 |
+
importlib.import_module("axolotl.prompters"), prompter
|
176 |
+
)
|
177 |
+
|
178 |
+
if cfg.landmark_attention:
|
179 |
+
from axolotl.monkeypatch.llama_landmark_attn import set_model_mem_id
|
180 |
+
|
181 |
+
set_model_mem_id(model, tokenizer)
|
182 |
+
model.set_mem_cache_args(
|
183 |
+
max_seq_len=255, mem_freq=50, top_k=5, max_cache_size=None
|
184 |
+
)
|
185 |
+
|
186 |
+
model = model.to(cfg.device)
|
187 |
+
|
188 |
+
def generate(instruction):
|
189 |
+
if not instruction:
|
190 |
+
return
|
191 |
+
if prompter_module:
|
192 |
+
# pylint: disable=stop-iteration-return
|
193 |
+
prompt: str = next(
|
194 |
+
prompter_module().build_prompt(instruction=instruction.strip("\n"))
|
195 |
+
)
|
196 |
+
else:
|
197 |
+
prompt = instruction.strip()
|
198 |
+
batch = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
|
199 |
+
|
200 |
+
model.eval()
|
201 |
+
with torch.no_grad():
|
202 |
+
generation_config = GenerationConfig(
|
203 |
+
repetition_penalty=1.1,
|
204 |
+
max_new_tokens=1024,
|
205 |
+
temperature=0.9,
|
206 |
+
top_p=0.95,
|
207 |
+
top_k=40,
|
208 |
+
bos_token_id=tokenizer.bos_token_id,
|
209 |
+
eos_token_id=tokenizer.eos_token_id,
|
210 |
+
pad_token_id=tokenizer.pad_token_id,
|
211 |
+
do_sample=True,
|
212 |
+
use_cache=True,
|
213 |
+
return_dict_in_generate=True,
|
214 |
+
output_attentions=False,
|
215 |
+
output_hidden_states=False,
|
216 |
+
output_scores=False,
|
217 |
+
)
|
218 |
+
streamer = TextIteratorStreamer(tokenizer)
|
219 |
+
generation_kwargs = {
|
220 |
+
"inputs": batch["input_ids"].to(cfg.device),
|
221 |
+
"generation_config": generation_config,
|
222 |
+
"streamer": streamer,
|
223 |
+
}
|
224 |
+
|
225 |
+
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
226 |
+
thread.start()
|
227 |
+
|
228 |
+
all_text = ""
|
229 |
+
|
230 |
+
for new_text in streamer:
|
231 |
+
all_text += new_text
|
232 |
+
yield all_text
|
233 |
+
|
234 |
+
demo = gr.Interface(
|
235 |
+
fn=generate,
|
236 |
+
inputs="textbox",
|
237 |
+
outputs="text",
|
238 |
+
title=cfg.get("gradio_title", "Axolotl Gradio Interface"),
|
239 |
+
)
|
240 |
+
demo.queue().launch(show_api=False, share=True)
|
241 |
+
|
242 |
+
|
243 |
def choose_config(path: Path):
|
244 |
yaml_files = list(path.glob("*.yml"))
|
245 |
|
src/axolotl/cli/inference.py
CHANGED
@@ -6,11 +6,16 @@ from pathlib import Path
|
|
6 |
import fire
|
7 |
import transformers
|
8 |
|
9 |
-
from axolotl.cli import
|
|
|
|
|
|
|
|
|
|
|
10 |
from axolotl.common.cli import TrainerCliArgs
|
11 |
|
12 |
|
13 |
-
def do_cli(config: Path = Path("examples/"), **kwargs):
|
14 |
# pylint: disable=duplicate-code
|
15 |
print_axolotl_text_art()
|
16 |
parsed_cfg = load_cfg(config, **kwargs)
|
@@ -21,7 +26,10 @@ def do_cli(config: Path = Path("examples/"), **kwargs):
|
|
21 |
)
|
22 |
parsed_cli_args.inference = True
|
23 |
|
24 |
-
|
|
|
|
|
|
|
25 |
|
26 |
|
27 |
if __name__ == "__main__":
|
|
|
6 |
import fire
|
7 |
import transformers
|
8 |
|
9 |
+
from axolotl.cli import (
|
10 |
+
do_inference,
|
11 |
+
do_inference_gradio,
|
12 |
+
load_cfg,
|
13 |
+
print_axolotl_text_art,
|
14 |
+
)
|
15 |
from axolotl.common.cli import TrainerCliArgs
|
16 |
|
17 |
|
18 |
+
def do_cli(config: Path = Path("examples/"), gradio=False, **kwargs):
|
19 |
# pylint: disable=duplicate-code
|
20 |
print_axolotl_text_art()
|
21 |
parsed_cfg = load_cfg(config, **kwargs)
|
|
|
26 |
)
|
27 |
parsed_cli_args.inference = True
|
28 |
|
29 |
+
if gradio:
|
30 |
+
do_inference_gradio(cfg=parsed_cfg, cli_args=parsed_cli_args)
|
31 |
+
else:
|
32 |
+
do_inference(cfg=parsed_cfg, cli_args=parsed_cli_args)
|
33 |
|
34 |
|
35 |
if __name__ == "__main__":
|