Spaces:
Runtime error
Runtime error
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from peft import PeftConfig, PeftModel | |
import torch | |
import re | |
hf_repo = "khanhdhq/test_finetune_bloom_3b" | |
config = PeftConfig.from_pretrained(hf_repo) | |
finetuned_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto') | |
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) | |
# Load the Lora model | |
finetuned_model = PeftModel.from_pretrained(finetuned_model, hf_repo) | |
def infer(text): | |
if torch.cuda.is_available(): | |
device = "cuda" | |
else: | |
device = "cpu" | |
try: | |
if torch.backends.mps.is_available(): | |
device = "mps" | |
except: # noqa: E722 | |
pass | |
inputs = tokenizer(text, add_special_tokens=True, return_tensors="pt").to(device) | |
outputs = finetuned_model.generate(**inputs, max_new_tokens=30) | |
response = tokenizer.decode(outputs[0]) | |
response = response.split('<bot>:')[-1] | |
# print(response) | |
response = re.split(r'<human>:|\"codepoints\"', response, re.IGNORECASE)[0].strip() | |
def split_string(string): | |
pattern = r'[^a-zA-Z0-9\sđđăâàáảạãầấẩậẫằắẳặẵẻẹẽèéẻêệễểỉịĩìíỏọõôồốổộỗơờớởợỡủụũưừứửựữỷỵỹỳýỷỹỵĐđÀÁẢẠĂÃẤẦẤẨẬẪẰẮẲẶẴẺẸẼÈÉẺÊỆỄỂỈỊĨÌÍỎỌÕÔỒỐỔỘỖƠỜỚỞỢỠỦỤŨƯỪỨỬỰỮỶỴỸỲÝỶỸỴ\.\?,<>!:;\'\"\(\)\{\}\[\]]' | |
result = re.split(pattern, string, re.IGNORECASE) | |
return result[0].strip() | |
response = split_string(response) | |
return response | |
import gradio as gr | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" | |
# OmiCall chatbot | |
Chat với tôi nếu bạn có hứng thú với các sản phẩm của OmiCall. | |
""") | |
chatbot = gr.Chatbot() | |
msg = gr.Textbox(label="Chatbot OmiCall", placeholder="chat ở đây") | |
# while not msg.strip(): | |
# msg = gr.Textbox(label="Chatbot OmiCall", placeholder="chat ở đây") | |
clear = gr.Button("Xóa lịch sử chat") | |
def user(user_message, history): | |
return gr.update(value="", interactive=False), history + [[user_message, None]] | |
def bot(history): | |
messages = [] | |
convs = history[-5:-1] | |
for h in history[-5:-1]: | |
messages.append(f'<human>: {h[0]}') | |
messages.append(f'<bot>: {h[1]}') | |
messages.append(f'<human>: {history[-1][0]} <bot>:') | |
mess = ' '.join(messages) | |
history[-1][1] = infer(mess) | |
return history | |
response = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( | |
bot, chatbot, chatbot | |
) | |
response.then(lambda: gr.update(interactive=True), None, [msg], queue=False) | |
clear.click(lambda: None, None, chatbot, queue=False) | |
demo.queue() | |
demo.launch() |