khanhdhq commited on
Commit
c80c6bb
1 Parent(s): c282e83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -83
app.py CHANGED
@@ -1,96 +1,74 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
-
4
- # In[ ]:
5
-
6
-
7
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
8
  import torch
9
- import gradio as gr
10
  import re
11
 
12
- def cleaning_history_tuple(history):
13
- s=sum(history,())
14
- s=list(s)
15
- s2=""
16
- for i in s:
17
- i=re.sub("\n", '', i)
18
- i=re.sub("<p>", '', i)
19
- i=re.sub("</p>", '', i)
20
- s2=s2+i+'\n'
21
- return s2
22
 
23
- def ai_output(string1,string2):
24
- a1=len(string1)
25
- a2=len(string2)
26
- string3=string2[a1:]
27
- sub1="A:"
28
- sub2="User"
29
- #sub3="\n"
30
- try:
31
- try:
32
- idx1=string3.index(sub1)
33
- response=string3[:idx1]
34
- return response
35
-
36
- except:
37
- idx1=string3.index(sub2)
38
- response=string3[:idx1]
39
- return response
40
- except:
41
- return string3
42
 
43
- model4 = AutoModelForCausalLM.from_pretrained("bigscience/bloom-3b")
44
- tokenizer4 = AutoTokenizer.from_pretrained("bigscience/bloom-3b")
 
 
 
 
45
 
46
- def predict(input,initial_prompt, temperature=0.7,top_p=1,top_k=5,max_tokens=64,no_repeat_ngram_size=1,num_beams=6,do_sample=True, history=[]):
47
-
48
- s = cleaning_history_tuple(history)
49
-
50
- s = s+ "\n"+ "User: "+ input + "\n" + "Assistant: "
51
- s2=initial_prompt+" " + s
52
-
53
- input_ids = tokenizer4.encode(str(s2), return_tensors="pt")
54
- response = model4.generate(input_ids, min_length = 10,
55
- max_new_tokens=int(max_tokens),
56
- top_k=int(top_k),
57
- top_p=float(top_p),
58
- temperature=float(temperature),
59
- no_repeat_ngram_size=int(no_repeat_ngram_size),
60
- num_beams = int(num_beams),
61
- do_sample = bool(do_sample),
62
- )
63
-
64
-
65
- response2 = tokenizer4.decode(response[0])
66
- print("Response after decoding tokenizer: ",response2)
67
- print("\n\n")
68
- response3=ai_output(s2,response2)
69
 
70
- input="User: "+input
71
- response3="Assistant: "+ response3
72
- history.append((input, response3))
 
 
 
 
 
 
73
 
74
- return history, history
75
-
76
- #gr.Interface(fn=predict,title="BLOOM-3b",
77
- # inputs=["text","text","text","text","text","text","text","text","text",'state'],
78
- #
79
- # outputs=["chatbot",'state']).launch()
80
-
81
 
82
- gr.Interface(inputs=[gr.Textbox(label="input", lines=1, value=""),
83
- gr.Textbox(label="initial_prompt", lines=1, value=prompt),
84
- gr.Textbox(label="temperature", lines=1, value=0.7),
85
- gr.Textbox(label="top_p", lines=1, value=1),
86
- gr.Textbox(label="top_k", lines=1, value=5),
87
- gr.Textbox(label="max_tokens", lines=1, value=64),
88
- gr.Textbox(label="no_repeat_ngram_size", lines=1, value=1),
89
- gr.Textbox(label="num_beams", lines=1, value=6),
90
- gr.Textbox(label="do_sample", lines=1, value="True"), 'state'],
91
- fn=predict, title="OPT-6.7B", outputs=["chatbot",'state']
 
 
 
92
 
93
- #inputs=["text","text","text","text","text","text","text","text","text",'state'],
 
 
 
 
 
 
 
 
 
94
 
95
- ).launch()
 
 
 
 
96
 
 
 
 
 
 
 
 
 
 
1
  from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ from peft import PeftConfig, PeftModel
3
  import torch
 
4
  import re
5
 
6
+ hf_repo = "khanhdhq/test_finetune_bloom_3b"
7
+ config = PeftConfig.from_pretrained(hf_repo)
8
+ finetuned_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto')
9
+ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
10
+ # Load the Lora model
11
+ finetuned_model = PeftModel.from_pretrained(finetuned_model, hf_repo)
 
 
 
 
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ @torch.no_grad()
15
+ def infer(text):
16
+ if torch.cuda.is_available():
17
+ device = "cuda"
18
+ else:
19
+ device = "cpu"
20
 
21
+ try:
22
+ if torch.backends.mps.is_available():
23
+ device = "mps"
24
+ except: # noqa: E722
25
+ pass
26
+ inputs = tokenizer(text, add_special_tokens=True, return_tensors="pt").to(device)
27
+ outputs = finetuned_model.generate(**inputs, max_new_tokens=30)
28
+ response = tokenizer.decode(outputs[0])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ response = response.split('<bot>:')[-1]
31
+ # print(response)
32
+ response = re.split(r'<human>:|\"codepoints\"', response, re.IGNORECASE)[0].strip()
33
+ def split_string(string):
34
+ pattern = r'[^a-zA-Z0-9\sđđăâàáảạãầấẩậẫằắẳặẵẻẹẽèéẻêệễểỉịĩìíỏọõôồốổộỗơờớởợỡủụũưừứửựữỷỵỹỳýỷỹỵĐđÀÁẢẠĂÃẤẦẤẨẬẪẰẮẲẶẴẺẸẼÈÉẺÊỆỄỂỈỊĨÌÍỎỌÕÔỒỐỔỘỖƠỜỚỞỢỠỦỤŨƯỪỨỬỰỮỶỴỸỲÝỶỸỴ\.\?,<>!:;\'\"\(\)\{\}\[\]]'
35
+ result = re.split(pattern, string, re.IGNORECASE)
36
+ return result[0].strip()
37
+ response = split_string(response)
38
+ return response
39
 
40
+ import gradio as gr
 
 
 
 
 
 
41
 
42
+ with gr.Blocks() as demo:
43
+ gr.Markdown(
44
+ """
45
+ # OmiCall chatbot
46
+ Chat với tôi nếu bạn có hứng thú với các sản phẩm của OmiCall.
47
+ """)
48
+ chatbot = gr.Chatbot()
49
+ msg = gr.Textbox(label="Chatbot OmiCall", placeholder="chat ở đây")
50
+ # while not msg.strip():
51
+ # msg = gr.Textbox(label="Chatbot OmiCall", placeholder="chat ở đây")
52
+ clear = gr.Button("Xóa lịch sử chat")
53
+ def user(user_message, history):
54
+ return gr.update(value="", interactive=False), history + [[user_message, None]]
55
 
56
+ def bot(history):
57
+ messages = []
58
+ convs = history[-5:-1]
59
+ for h in history[-5:-1]:
60
+ messages.append(f'<human>: {h[0]}')
61
+ messages.append(f'<bot>: {h[1]}')
62
+ messages.append(f'<human>: {history[-1][0]} <bot>:')
63
+ mess = ' '.join(messages)
64
+ history[-1][1] = infer(mess)
65
+ return history
66
 
67
+ response = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
68
+ bot, chatbot, chatbot
69
+ )
70
+ response.then(lambda: gr.update(interactive=True), None, [msg], queue=False)
71
+ clear.click(lambda: None, None, chatbot, queue=False)
72
 
73
+ demo.queue()
74
+ demo.launch()