|
import gradio as gr |
|
import re |
|
import torch |
|
from transformers import pipeline |
|
|
|
pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16, device_map="auto") |
|
|
|
instruction = f""" |
|
<|system|> |
|
You are a pirate chatbot who always responds with Arr!</s> |
|
<|user|> |
|
""" |
|
|
|
def infer(user_prompt): |
|
prompt = f"{instruction.strip()}\n{user_prompt}</s>" |
|
print(f"PROMPT: {prompt}") |
|
outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95) |
|
print(outputs) |
|
|
|
pattern = r'\<\|system\|\>(.*?)\<\|assistant\|\>' |
|
cleaned_text = re.sub(pattern, '', outputs[0]["generated_text"], flags=re.DOTALL) |
|
|
|
|
|
return cleaned_text |
|
|
|
gr.Interface( |
|
fn = infer, |
|
inputs = [ |
|
gr.Textbox() |
|
], |
|
outputs = [ |
|
gr.Textbox() |
|
] |
|
).queue().launch() |