Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
@@ -7,11 +7,9 @@ import random
|
|
7 |
import pandas as pd
|
8 |
from sklearn.model_selection import train_test_split
|
9 |
import time
|
10 |
-
|
11 |
#from model import RNN_model
|
12 |
from timeit import default_timer as timer
|
13 |
from typing import Tuple, Dict
|
14 |
-
|
15 |
################################################################################
|
16 |
import argparse
|
17 |
import numpy as np
|
@@ -21,18 +19,13 @@ import copy
|
|
21 |
from str2bool import str2bool
|
22 |
from typing import Dict, Sequence
|
23 |
from sentence_transformers import SentenceTransformer
|
24 |
-
|
25 |
import torch
|
26 |
-
import json
|
27 |
-
|
28 |
-
import transformers
|
29 |
from modeling_phi import PhiForCausalLM
|
30 |
from tokenization_codegen import CodeGenTokenizer
|
|
|
31 |
################################################################################
|
32 |
-
|
33 |
parser = argparse.ArgumentParser()
|
34 |
#############################################################################################################################
|
35 |
-
|
36 |
parser.add_argument('--device_id', type=str, default="0")
|
37 |
parser.add_argument('--model', type=str, default="microsoft/phi-2", help="") ## /phi-1.5
|
38 |
parser.add_argument('--embedder', type=str, default="BAAI/bge-small-en-v1.5") ## /bge-small-en-v1.5 # bge-m3
|
@@ -51,7 +44,6 @@ if torch.cuda.is_available():
|
|
51 |
print(f'################################################################# device: {device}#################################################################')
|
52 |
else:
|
53 |
device = "cpu"
|
54 |
-
|
55 |
def get_model(base_model: str = "bigcode/starcoder",):
|
56 |
tokenizer = CodeGenTokenizer.from_pretrained(base_model)
|
57 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
@@ -59,14 +51,10 @@ def get_model(base_model: str = "bigcode/starcoder",):
|
|
59 |
|
60 |
model = PhiForCausalLM.from_pretrained(
|
61 |
base_model,
|
62 |
-
device_map="auto",
|
63 |
-
)
|
64 |
model.config.pad_token_id = tokenizer.pad_token_id
|
65 |
-
|
66 |
model.eval()
|
67 |
-
|
68 |
return tokenizer, model
|
69 |
-
|
70 |
################################################################################
|
71 |
|
72 |
'''
|
@@ -87,14 +75,14 @@ The bot will respond based on the best possible answers to your messages.
|
|
87 |
"""
|
88 |
# Create the gradio demo
|
89 |
with gr.Blocks(css = """#col_container { margin-left: auto; margin-right: auto;} #chatbot {height: 520px; overflow: auto;}""") as demo:
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
def respond(message, chat_history):
|
99 |
# Create couple of if-else statements to capture/mimick peoples's Interaction
|
100 |
embedder = SentenceTransformer(args.embedder, device=device)
|
@@ -104,10 +92,37 @@ with gr.Blocks(css = """#col_container { margin-left: auto; margin-right: auto;}
|
|
104 |
chat_history.append((message, bot_message))
|
105 |
time.sleep(2)
|
106 |
return "", chat_history
|
107 |
-
|
108 |
-
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
# Launch the demo
|
112 |
-
demo.launch()
|
113 |
-
|
|
|
7 |
import pandas as pd
|
8 |
from sklearn.model_selection import train_test_split
|
9 |
import time
|
|
|
10 |
#from model import RNN_model
|
11 |
from timeit import default_timer as timer
|
12 |
from typing import Tuple, Dict
|
|
|
13 |
################################################################################
|
14 |
import argparse
|
15 |
import numpy as np
|
|
|
19 |
from str2bool import str2bool
|
20 |
from typing import Dict, Sequence
|
21 |
from sentence_transformers import SentenceTransformer
|
|
|
22 |
import torch
|
|
|
|
|
|
|
23 |
from modeling_phi import PhiForCausalLM
|
24 |
from tokenization_codegen import CodeGenTokenizer
|
25 |
+
from transformers import PhiForCausalLM, AutoTokenizer, AutoModelForCausalLM
|
26 |
################################################################################
|
|
|
27 |
parser = argparse.ArgumentParser()
|
28 |
#############################################################################################################################
|
|
|
29 |
parser.add_argument('--device_id', type=str, default="0")
|
30 |
parser.add_argument('--model', type=str, default="microsoft/phi-2", help="") ## /phi-1.5
|
31 |
parser.add_argument('--embedder', type=str, default="BAAI/bge-small-en-v1.5") ## /bge-small-en-v1.5 # bge-m3
|
|
|
44 |
print(f'################################################################# device: {device}#################################################################')
|
45 |
else:
|
46 |
device = "cpu"
|
|
|
47 |
def get_model(base_model: str = "bigcode/starcoder",):
|
48 |
tokenizer = CodeGenTokenizer.from_pretrained(base_model)
|
49 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
|
|
51 |
|
52 |
model = PhiForCausalLM.from_pretrained(
|
53 |
base_model,
|
54 |
+
device_map="auto",)
|
|
|
55 |
model.config.pad_token_id = tokenizer.pad_token_id
|
|
|
56 |
model.eval()
|
|
|
57 |
return tokenizer, model
|
|
|
58 |
################################################################################
|
59 |
|
60 |
'''
|
|
|
75 |
"""
|
76 |
# Create the gradio demo
|
77 |
with gr.Blocks(css = """#col_container { margin-left: auto; margin-right: auto;} #chatbot {height: 520px; overflow: auto;}""") as demo:
|
78 |
+
gr.HTML('<h1 align="center">Medical Chatbot: ARIN 7102')
|
79 |
+
#gr.HTML('<h3 align="center">To know more about this project')
|
80 |
+
with gr.Accordion("Follow these Steps to use the Gradio WebUI", open=True):
|
81 |
+
gr.HTML(howto)
|
82 |
+
chatbot = gr.Chatbot()
|
83 |
+
msg = gr.Textbox()
|
84 |
+
clear = gr.ClearButton([msg, chatbot])
|
85 |
+
'''
|
86 |
def respond(message, chat_history):
|
87 |
# Create couple of if-else statements to capture/mimick peoples's Interaction
|
88 |
embedder = SentenceTransformer(args.embedder, device=device)
|
|
|
92 |
chat_history.append((message, bot_message))
|
93 |
time.sleep(2)
|
94 |
return "", chat_history
|
95 |
+
'''
|
96 |
+
def respond(message, chat_history, base_model = "/home/henry/Desktop/ARIN7102/phi-2", device=device): # "meta-llama/Meta-Llama-3-70B"
|
97 |
+
# define the model and tokenizer.
|
98 |
+
# model = PhiForCausalLM.from_pretrained(base_model)
|
99 |
+
model = AutoModelForCausalLM.from_pretrained(base_model)
|
100 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model)
|
101 |
+
|
102 |
+
# feel free to change the prompt to your liking.
|
103 |
+
#prompt = f"Patient: coercive spondylitis, pain in the lumbosacral area when turning over during sleep at night, no pain in any other part of the body.
|
104 |
+
#/n Doctor: It shouldn't be a problem, but it's better to upload the images. /n Patient: {message} /n Doctor:"
|
105 |
+
output_termination = "\nOutput:"
|
106 |
+
prompt = f"Instruct: {message}{output_termination}"
|
107 |
+
# apply the tokenizer.
|
108 |
+
tokens = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)
|
109 |
+
#tokens = tokens.to(device)
|
110 |
+
#eos_token_id = tokenizer.eos_token_id
|
111 |
+
# use the model to generate new tokens.
|
112 |
+
generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=500, eos_token_id=50256, pad_token_id=50256)
|
113 |
+
|
114 |
+
# Find the position of "Output:" and extract the text after it
|
115 |
+
generated_text = tokenizer.batch_decode(generated_output)[0]
|
116 |
+
# Split the text at "Output:" and take the second part
|
117 |
+
split_text = generated_text.split("Output:", 1)
|
118 |
+
bot_message = split_text[1].strip() if len(split_text) > 1 else ""
|
119 |
+
bot_message = bot_message.replace("<|endoftext|>", "").strip()
|
120 |
+
chat_history.append((message, bot_message))
|
121 |
+
time.sleep(2)
|
122 |
+
return "", chat_history
|
123 |
+
#return bot_message
|
124 |
+
|
125 |
+
msg.submit(respond, [msg, chatbot], [msg, chatbot])
|
126 |
|
127 |
# Launch the demo
|
128 |
+
demo.launch()
|
|