Spaces:
Sleeping
Sleeping
# Import and class names setup | |
import gradio as gr | |
import os | |
import torch | |
import random | |
#import nltk_u | |
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
import time | |
#from model import RNN_model | |
from timeit import default_timer as timer | |
from typing import Tuple, Dict | |
################################################################################ | |
import argparse | |
import numpy as np | |
import pprint | |
import os | |
import copy | |
from str2bool import str2bool | |
from typing import Dict, Sequence | |
from sentence_transformers import SentenceTransformer | |
import torch | |
from modeling_phi import PhiForCausalLM | |
from tokenization_codegen import CodeGenTokenizer | |
from transformers import PhiForCausalLM, AutoTokenizer, AutoModelForCausalLM | |
################################################################################ | |
parser = argparse.ArgumentParser() | |
############################################################################################################################# | |
parser.add_argument('--device_id', type=str, default="0") | |
parser.add_argument('--model', type=str, default="microsoft/phi-2", help="") ## /phi-1.5 | |
parser.add_argument('--embedder', type=str, default="BAAI/bge-small-en-v1.5") ## /bge-small-en-v1.5 # bge-m3 | |
parser.add_argument('--output_path', type=str, default="/home/henry/Desktop/HKU-DASC7606-A2/Outputs/ARC-Challenge-test", help="") ## -bge-m3 | |
parser.add_argument('--start_index', type=int, default=0, help="") | |
parser.add_argument('--end_index', type=int, default=9999, help="") | |
parser.add_argument('--N', type=int, default=8, help="") | |
parser.add_argument('--max_len', type=int, default=1024, help="") | |
parser.add_argument('--prompt_type', type=str, default="v2.0", help="") | |
parser.add_argument('--top_k', type=str2bool, default=True, help="") | |
############################################################################################################################# | |
args = parser.parse_args() | |
if torch.cuda.is_available(): | |
device = "cuda" | |
print(f'################################################################# device: {device}#################################################################') | |
else: | |
device = "cpu" | |
def get_model(base_model: str = "bigcode/starcoder",): | |
tokenizer = CodeGenTokenizer.from_pretrained(base_model) | |
tokenizer.pad_token_id = tokenizer.eos_token_id | |
tokenizer.pad_token = tokenizer.eos_token | |
model = PhiForCausalLM.from_pretrained( | |
base_model, | |
device_map="auto",) | |
model.config.pad_token_id = tokenizer.pad_token_id | |
model.eval() | |
return tokenizer, model | |
################################################################################ | |
''' | |
# Import data | |
df= pd.read_csv('Symptom2Disease.csv') | |
df.drop('Unnamed: 0', axis= 1, inplace= True) | |
# Preprocess data | |
df.drop_duplicates(inplace= True) | |
train_data, test_data= train_test_split(df, test_size=0.15, random_state=42 ) | |
''' | |
howto= """Welcome to the <b>Medical Chatbot</b>, powered by Gradio. | |
Currently, the chatbot can WELCOME YOU, PREDICT DISEASE based on your symptoms and SUGGEST POSSIBLE SOLUTIONS AND RECOMENDATIONS, and BID YOU FAREWELL. | |
<b>How to Start:</b> Simply type your messages in the textbox to chat with the Chatbot and press enter!<br><br> | |
The bot will respond based on the best possible answers to your messages.""" | |
# Create the gradio demo | |
with gr.Blocks(css = """#col_container { margin-left: auto; margin-right: auto;} #chatbot {height: 520px; overflow: auto;}""") as demo: | |
gr.HTML('<h1 align="center">Medical Chatbot: ARIN 7102') | |
#gr.HTML('<h3 align="center">To know more about this project') | |
with gr.Accordion("Follow these Steps to use the Gradio WebUI", open=True): | |
gr.HTML(howto) | |
chatbot = gr.Chatbot() | |
msg = gr.Textbox() | |
clear = gr.ClearButton([msg, chatbot]) | |
''' | |
def respond(message, chat_history): | |
# Create couple of if-else statements to capture/mimick peoples's Interaction | |
embedder = SentenceTransformer(args.embedder, device=device) | |
tokenizer, model = get_model(base_model=args.model) | |
message_embeddings = embedder.encode(message) | |
bot_message = model(message_embeddings) | |
chat_history.append((message, bot_message)) | |
time.sleep(2) | |
return "", chat_history | |
''' | |
def respond(message, chat_history, base_model = "microsoft/phi-2", device=device): # "meta-llama/Meta-Llama-3-70B" | |
if base_model != "microsoft/phi-2": | |
# Random greetings in list format | |
greetings = [ | |
"hello!",'hello', 'hii !', 'hi', "hi there!", "hi there!", "heyy", 'good morning', 'good afternoon', 'good evening' | |
"hey", "how are you", "how are you?", "how is it going", "how is it going?", | |
"what's up?", "how are you?", | |
"hey, how are you?", "what is popping" | |
"good to see you!", "howdy!", | |
"hi, nice to meet you.", "hiya!", | |
"hi", "hi, what's new?", | |
"hey, how's your day?", "hi, how have you been?", "greetings", | |
] | |
# Random Greetings responses | |
responses = [ | |
"Thank you for using our medical chatbot. Please provide the symptoms you're experiencing, and I'll do my best to predict the possible disease.", | |
"Hello! I'm here to help you with medical predictions based on your symptoms. Please describe your symptoms in as much detail as possible.", | |
"Greetings! I am a specialized medical chatbot trained to predict potential diseases based on the symptoms you provide. Kindly list your symptoms explicitly.", | |
"Welcome to the medical chatbot. To assist you accurately, please share your symptoms in explicit detail.", | |
"Hi there! I'm a medical chatbot specialized in analyzing symptoms to suggest possible diseases. Please provide your symptoms explicitly.", | |
"Hey! I'm your medical chatbot. Describe your symptoms with as much detail as you can, and I'll generate potential disease predictions.", | |
"How can I assist you today? I'm a medical chatbot trained to predict diseases based on symptoms. Please be explicit while describing your symptoms.", | |
"Hello! I'm a medical chatbot capable of predicting diseases based on the symptoms you provide. Your explicit symptom description will help me assist you better.", | |
"Greetings! I'm here to help with medical predictions. Describe your symptoms explicitly, and I'll offer insights into potential diseases.", | |
"Hi, I'm the medical chatbot. I've been trained to predict diseases from symptoms. The more explicit you are about your symptoms, the better I can assist you.", | |
"Hi, I specialize in medical predictions based on symptoms. Kindly provide detailed symptoms for accurate disease predictions.", | |
"Hello! I'm a medical chatbot with expertise in predicting diseases from symptoms. Please describe your symptoms explicitly to receive accurate insights.", | |
] | |
# Random goodbyes | |
goodbyes = [ | |
"farewell!",'bye', 'goodbye','good-bye', 'good bye', 'bye', 'thank you', 'later', "take care!", | |
"see you later!", 'see you', 'see ya', 'see-you', 'thanks', 'thank', 'bye bye', 'byebye' | |
"catch you on the flip side!", "adios!", | |
"goodbye for now!", "till we meet again!", | |
"so long!", "hasta la vista!", | |
"bye-bye!", "keep in touch!", | |
"toodles!", "ciao!", | |
"later, gator!", "stay safe and goodbye!", | |
"peace out!", "until next time!", "off I go!", | |
] | |
# Random Goodbyes responses | |
goodbye_replies = [ | |
"Take care of yourself! If you have more questions, don't hesitate to reach out.", | |
"Stay well! Remember, I'm here if you need further medical advice.", | |
"Goodbye for now! Don't hesitate to return if you need more information in the future.", | |
"Wishing you good health ahead! Feel free to come back if you have more concerns.", | |
"Farewell! If you have more symptoms or questions, don't hesitate to consult again.", | |
"Take care and stay informed about your health. Feel free to chat anytime.", | |
"Bye for now! Remember, your well-being is a priority. Don't hesitate to ask if needed.", | |
"Have a great day ahead! If you need medical guidance later on, I'll be here.", | |
"Stay well and take it easy! Reach out if you need more medical insights.", | |
"Until next time! Prioritize your health and reach out if you need assistance.", | |
"Goodbye! Your health matters. Feel free to return if you have more health-related queries.", | |
"Stay healthy and stay curious about your health! If you need more info, just ask.", | |
"Wishing you wellness on your journey! If you have more questions, I'm here to help.", | |
"Take care and remember, your health is important. Don't hesitate to reach out if needed.", | |
"Goodbye for now! Stay informed and feel free to consult if you require medical advice.", | |
"Stay well and stay proactive about your health! If you have more queries, feel free to ask.", | |
"Farewell! Remember, I'm here whenever you need reliable medical information.", | |
"Bye for now! Stay vigilant about your health and don't hesitate to return if necessary.", | |
"Take care and keep your well-being a priority! Reach out if you have more health questions.", | |
"Wishing you good health ahead! Don't hesitate to chat if you need medical insights.", | |
"Goodbye! Stay well and remember, I'm here to assist you with medical queries.", | |
] | |
# Create couple of if-else statements to capture/mimick peoples's Interaction | |
if message.lower() in greetings: | |
bot_message= random.choice(responses) | |
elif message.lower() in goodbyes: | |
bot_message= random.choice(goodbye_replies) | |
else: | |
inputs = preprocess(message) | |
bot_message = f"Based on your symptoms, I believe you may have {get_prediction(inputs)}." | |
else: | |
# define the model and tokenizer. | |
# model = PhiForCausalLM.from_pretrained(base_model) | |
model = AutoModelForCausalLM.from_pretrained(base_model) | |
tokenizer = AutoTokenizer.from_pretrained(base_model) | |
# feel free to change the prompt to your liking. | |
#prompt = f"Patient: coercive spondylitis, pain in the lumbosacral area when turning over during sleep at night, no pain in any other part of the body. | |
#/n Doctor: It shouldn't be a problem, but it's better to upload the images. /n Patient: {message} /n Doctor:" | |
output_termination = "\nOutput:" | |
prompt = f"Instruct: {message}{output_termination}" | |
# apply the tokenizer. | |
tokens = tokenizer(prompt, return_tensors="pt", return_attention_mask=False) | |
#tokens = tokens.to(device) | |
#eos_token_id = tokenizer.eos_token_id | |
# use the model to generate new tokens. | |
generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=100, eos_token_id=50256, pad_token_id=50256) | |
# Find the position of "Output:" and extract the text after it | |
generated_text = tokenizer.batch_decode(generated_output)[0] | |
# Split the text at "Output:" and take the second part | |
split_text = generated_text.split("Output:", 1) | |
bot_message = split_text[1].strip() if len(split_text) > 1 else "" | |
bot_message = bot_message.replace("<|endoftext|>", "").strip() | |
chat_history.append((message, bot_message)) | |
time.sleep(2) | |
return "", chat_history | |
#return bot_message | |
msg.submit(respond, [msg, chatbot], [msg, chatbot]) | |
# Launch the demo | |
demo.launch() |