demo_chatbot

Runtime error

App Files Files Community

khanhdhq commited on Jun 12, 2023

Commit

c80c6bb

1 Parent(s): c282e83

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -83

app.py CHANGED Viewed

@@ -1,96 +1,74 @@
-#!/usr/bin/env python
-# coding: utf-8
-# In[ ]:
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-import gradio as gr
 import re
-def cleaning_history_tuple(history):
-    s=sum(history,())
-    s=list(s)
-    s2=""
-    for i in s:
-        i=re.sub("\n", '', i)
-        i=re.sub("<p>", '', i)
-        i=re.sub("</p>", '', i)
-        s2=s2+i+'\n'
-    return s2
-def ai_output(string1,string2):
-    a1=len(string1)
-    a2=len(string2)
-    string3=string2[a1:]
-    sub1="A:"
-    sub2="User"
-    #sub3="\n"
-    try:
-        try:
-            idx1=string3.index(sub1)
-            response=string3[:idx1]
-            return response
-        except:
-            idx1=string3.index(sub2)
-            response=string3[:idx1]
-            return response
-    except:
-        return string3
-model4 = AutoModelForCausalLM.from_pretrained("bigscience/bloom-3b")
-tokenizer4 = AutoTokenizer.from_pretrained("bigscience/bloom-3b")
-def predict(input,initial_prompt, temperature=0.7,top_p=1,top_k=5,max_tokens=64,no_repeat_ngram_size=1,num_beams=6,do_sample=True, history=[]):
-    s = cleaning_history_tuple(history)
-    s = s+ "\n"+ "User: "+ input + "\n" + "Assistant: "
-    s2=initial_prompt+" " + s
-    input_ids = tokenizer4.encode(str(s2), return_tensors="pt")
-    response = model4.generate(input_ids, min_length = 10,
-                         max_new_tokens=int(max_tokens),
-                         top_k=int(top_k),
-                         top_p=float(top_p),
-                         temperature=float(temperature),
-                         no_repeat_ngram_size=int(no_repeat_ngram_size),
-                         num_beams = int(num_beams),
-                         do_sample = bool(do_sample),
-                         )
-    response2 = tokenizer4.decode(response[0])
-    print("Response after decoding tokenizer: ",response2)
-    print("\n\n")
-    response3=ai_output(s2,response2)
-    input="User: "+input
-    response3="Assistant: "+ response3
-    history.append((input, response3))
-    return history, history
-#gr.Interface(fn=predict,title="BLOOM-3b",
-#             inputs=["text","text","text","text","text","text","text","text","text",'state'],
-#
-#             outputs=["chatbot",'state']).launch()
-gr.Interface(inputs=[gr.Textbox(label="input", lines=1, value=""),
-                     gr.Textbox(label="initial_prompt", lines=1, value=prompt),
-                     gr.Textbox(label="temperature", lines=1, value=0.7),
-                     gr.Textbox(label="top_p", lines=1, value=1),
-                     gr.Textbox(label="top_k", lines=1, value=5),
-                     gr.Textbox(label="max_tokens", lines=1, value=64),
-                     gr.Textbox(label="no_repeat_ngram_size", lines=1, value=1),
-                     gr.Textbox(label="num_beams", lines=1, value=6),
-                     gr.Textbox(label="do_sample", lines=1, value="True"), 'state'],
-             fn=predict, title="OPT-6.7B", outputs=["chatbot",'state']
-             #inputs=["text","text","text","text","text","text","text","text","text",'state'],
-             ).launch()

 from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftConfig, PeftModel
 import torch
 import re
+hf_repo = "khanhdhq/test_finetune_bloom_3b"
+config = PeftConfig.from_pretrained(hf_repo)
+finetuned_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto')
+tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
+# Load the Lora model
+finetuned_model = PeftModel.from_pretrained(finetuned_model, hf_repo)
+@torch.no_grad()
+def infer(text):
+    if torch.cuda.is_available():
+        device = "cuda"
+    else:
+        device = "cpu"
+    try:
+        if torch.backends.mps.is_available():
+            device = "mps"
+    except:  # noqa: E722
+        pass
+    inputs = tokenizer(text, add_special_tokens=True, return_tensors="pt").to(device)
+    outputs = finetuned_model.generate(**inputs, max_new_tokens=30)
+    response = tokenizer.decode(outputs[0])
+    response = response.split('<bot>:')[-1]
+    # print(response)
+    response = re.split(r'<human>:|\"codepoints\"', response, re.IGNORECASE)[0].strip()
+    def split_string(string):
+        pattern = r'[^a-zA-Z0-9\sđđăâàáảạãầấẩậẫằắẳặẵẻẹẽèéẻêệễểỉịĩìíỏọõôồốổộỗơờớởợỡủụũưừứửựữỷỵỹỳýỷỹỵĐđÀÁẢẠĂÃẤẦẤẨẬẪẰẮẲẶẴẺẸẼÈÉẺÊỆỄỂỈỊĨÌÍỎỌÕÔỒỐỔỘỖƠỜỚỞỢỠỦỤŨƯỪỨỬỰỮỶỴỸỲÝỶỸỴ\.\?,<>!:;\'\"\(\)\{\}\[\]]'
+        result = re.split(pattern, string, re.IGNORECASE)
+        return result[0].strip()
+    response = split_string(response)
+    return response
+import gradio as gr
+with gr.Blocks() as demo:
+    gr.Markdown(
+        """
+        # OmiCall chatbot
+        Chat với tôi nếu bạn có hứng thú với các sản phẩm của OmiCall.
+        """)
+    chatbot = gr.Chatbot()
+    msg = gr.Textbox(label="Chatbot OmiCall", placeholder="chat ở đây")
+    # while not msg.strip():
+    #     msg = gr.Textbox(label="Chatbot OmiCall", placeholder="chat ở đây")
+    clear = gr.Button("Xóa lịch sử chat")
+    def user(user_message, history):
+        return gr.update(value="", interactive=False), history + [[user_message, None]]
+    def bot(history):
+        messages = []
+        convs = history[-5:-1]
+        for h in history[-5:-1]:
+            messages.append(f'<human>: {h[0]}')
+            messages.append(f'<bot>: {h[1]}')
+        messages.append(f'<human>: {history[-1][0]} <bot>:')
+        mess = ' '.join(messages)
+        history[-1][1] = infer(mess)
+        return history
+    response = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+        bot, chatbot, chatbot
+    )
+    response.then(lambda: gr.update(interactive=True), None, [msg], queue=False)
+    clear.click(lambda: None, None, chatbot, queue=False)
+demo.queue()
+demo.launch()