Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,96 +1,74 @@
|
|
1 |
-
#!/usr/bin/env python
|
2 |
-
# coding: utf-8
|
3 |
-
|
4 |
-
# In[ ]:
|
5 |
-
|
6 |
-
|
7 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
8 |
import torch
|
9 |
-
import gradio as gr
|
10 |
import re
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
i=re.sub("<p>", '', i)
|
19 |
-
i=re.sub("</p>", '', i)
|
20 |
-
s2=s2+i+'\n'
|
21 |
-
return s2
|
22 |
|
23 |
-
def ai_output(string1,string2):
|
24 |
-
a1=len(string1)
|
25 |
-
a2=len(string2)
|
26 |
-
string3=string2[a1:]
|
27 |
-
sub1="A:"
|
28 |
-
sub2="User"
|
29 |
-
#sub3="\n"
|
30 |
-
try:
|
31 |
-
try:
|
32 |
-
idx1=string3.index(sub1)
|
33 |
-
response=string3[:idx1]
|
34 |
-
return response
|
35 |
-
|
36 |
-
except:
|
37 |
-
idx1=string3.index(sub2)
|
38 |
-
response=string3[:idx1]
|
39 |
-
return response
|
40 |
-
except:
|
41 |
-
return string3
|
42 |
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
45 |
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
response = model4.generate(input_ids, min_length = 10,
|
55 |
-
max_new_tokens=int(max_tokens),
|
56 |
-
top_k=int(top_k),
|
57 |
-
top_p=float(top_p),
|
58 |
-
temperature=float(temperature),
|
59 |
-
no_repeat_ngram_size=int(no_repeat_ngram_size),
|
60 |
-
num_beams = int(num_beams),
|
61 |
-
do_sample = bool(do_sample),
|
62 |
-
)
|
63 |
-
|
64 |
-
|
65 |
-
response2 = tokenizer4.decode(response[0])
|
66 |
-
print("Response after decoding tokenizer: ",response2)
|
67 |
-
print("\n\n")
|
68 |
-
response3=ai_output(s2,response2)
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
-
|
75 |
-
|
76 |
-
#gr.Interface(fn=predict,title="BLOOM-3b",
|
77 |
-
# inputs=["text","text","text","text","text","text","text","text","text",'state'],
|
78 |
-
#
|
79 |
-
# outputs=["chatbot",'state']).launch()
|
80 |
-
|
81 |
|
82 |
-
gr.
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
92 |
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
-
|
|
|
|
|
|
|
|
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
2 |
+
from peft import PeftConfig, PeftModel
|
3 |
import torch
|
|
|
4 |
import re
|
5 |
|
6 |
+
hf_repo = "khanhdhq/test_finetune_bloom_3b"
|
7 |
+
config = PeftConfig.from_pretrained(hf_repo)
|
8 |
+
finetuned_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto')
|
9 |
+
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
|
10 |
+
# Load the Lora model
|
11 |
+
finetuned_model = PeftModel.from_pretrained(finetuned_model, hf_repo)
|
|
|
|
|
|
|
|
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
+
@torch.no_grad()
|
15 |
+
def infer(text):
|
16 |
+
if torch.cuda.is_available():
|
17 |
+
device = "cuda"
|
18 |
+
else:
|
19 |
+
device = "cpu"
|
20 |
|
21 |
+
try:
|
22 |
+
if torch.backends.mps.is_available():
|
23 |
+
device = "mps"
|
24 |
+
except: # noqa: E722
|
25 |
+
pass
|
26 |
+
inputs = tokenizer(text, add_special_tokens=True, return_tensors="pt").to(device)
|
27 |
+
outputs = finetuned_model.generate(**inputs, max_new_tokens=30)
|
28 |
+
response = tokenizer.decode(outputs[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
+
response = response.split('<bot>:')[-1]
|
31 |
+
# print(response)
|
32 |
+
response = re.split(r'<human>:|\"codepoints\"', response, re.IGNORECASE)[0].strip()
|
33 |
+
def split_string(string):
|
34 |
+
pattern = r'[^a-zA-Z0-9\sđđăâàáảạãầấẩậẫằắẳặẵẻẹẽèéẻêệễểỉịĩìíỏọõôồốổộỗơờớởợỡủụũưừứửựữỷỵỹỳýỷỹỵĐđÀÁẢẠĂÃẤẦẤẨẬẪẰẮẲẶẴẺẸẼÈÉẺÊỆỄỂỈỊĨÌÍỎỌÕÔỒỐỔỘỖƠỜỚỞỢỠỦỤŨƯỪỨỬỰỮỶỴỸỲÝỶỸỴ\.\?,<>!:;\'\"\(\)\{\}\[\]]'
|
35 |
+
result = re.split(pattern, string, re.IGNORECASE)
|
36 |
+
return result[0].strip()
|
37 |
+
response = split_string(response)
|
38 |
+
return response
|
39 |
|
40 |
+
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
+
with gr.Blocks() as demo:
|
43 |
+
gr.Markdown(
|
44 |
+
"""
|
45 |
+
# OmiCall chatbot
|
46 |
+
Chat với tôi nếu bạn có hứng thú với các sản phẩm của OmiCall.
|
47 |
+
""")
|
48 |
+
chatbot = gr.Chatbot()
|
49 |
+
msg = gr.Textbox(label="Chatbot OmiCall", placeholder="chat ở đây")
|
50 |
+
# while not msg.strip():
|
51 |
+
# msg = gr.Textbox(label="Chatbot OmiCall", placeholder="chat ở đây")
|
52 |
+
clear = gr.Button("Xóa lịch sử chat")
|
53 |
+
def user(user_message, history):
|
54 |
+
return gr.update(value="", interactive=False), history + [[user_message, None]]
|
55 |
|
56 |
+
def bot(history):
|
57 |
+
messages = []
|
58 |
+
convs = history[-5:-1]
|
59 |
+
for h in history[-5:-1]:
|
60 |
+
messages.append(f'<human>: {h[0]}')
|
61 |
+
messages.append(f'<bot>: {h[1]}')
|
62 |
+
messages.append(f'<human>: {history[-1][0]} <bot>:')
|
63 |
+
mess = ' '.join(messages)
|
64 |
+
history[-1][1] = infer(mess)
|
65 |
+
return history
|
66 |
|
67 |
+
response = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
68 |
+
bot, chatbot, chatbot
|
69 |
+
)
|
70 |
+
response.then(lambda: gr.update(interactive=True), None, [msg], queue=False)
|
71 |
+
clear.click(lambda: None, None, chatbot, queue=False)
|
72 |
|
73 |
+
demo.queue()
|
74 |
+
demo.launch()
|