Spaces:
Runtime error
Runtime error
from gradio_client import Client | |
import numpy as np | |
import gradio as gr | |
import requests | |
import json | |
import dotenv | |
import soundfile as sf | |
import time | |
import textwrap | |
from PIL import Image | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
import os | |
import uuid | |
import optimum | |
welcome_message = """ | |
# 👋🏻Welcome to ⚕🗣️😷TruEra - MultiMed ⚕🗣️😷 | |
🗣️📝 This is an accessible and multimodal tool optimized using TruEra! We evaluated several configurations, prompts, and models to optimize this application. | |
### How To Use ⚕🗣️😷TruEra - MultiMed⚕: | |
🗣️📝Interact with ⚕🗣️😷TruEra - MultiMed⚕ in any language using image, audio or text. ⚕🗣️😷TruEra - MultiMed is an accessible application 📚🌟💼 that uses [Qwen/Qwen-1_8B-Chat](https://huggingface.co/Qwen/Qwen-1_8B-Chat) and [Tonic1/Official-Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat) with [Vectara](https://huggingface.co/vectara) embeddings + retrieval w/ [facebook/seamless-m4t-v2-large](https://huggingface.co/facebook/hf-seamless-m4t-large) for audio translation & accessibility. | |
do [get in touch](https://discord.gg/GWpVpekp). You can also use 😷TruEra MultiMed⚕️ on your own data & in your own way by cloning this space. 🧬🔬🔍 Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/TeamTonic/MultiMed?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></h3> | |
### Join us : | |
🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community on 👻Discord: [Discord](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [PolyGPT](https://github.com/tonic-ai/polygpt-alpha)" | |
""" | |
languages = { | |
"English": "eng", | |
"Modern Standard Arabic": "arb", | |
"Bengali": "ben", | |
"Catalan": "cat", | |
"Czech": "ces", | |
"Mandarin Chinese": "cmn", | |
"Welsh": "cym", | |
"Danish": "dan", | |
"German": "deu", | |
"Estonian": "est", | |
"Finnish": "fin", | |
"French": "fra", | |
"Hindi": "hin", | |
"Indonesian": "ind", | |
"Italian": "ita", | |
"Japanese": "jpn", | |
"Korean": "kor", | |
"Maltese": "mlt", | |
"Dutch": "nld", | |
"Western Persian": "pes", | |
"Polish": "pol", | |
"Portuguese": "por", | |
"Romanian": "ron", | |
"Russian": "rus", | |
"Slovak": "slk", | |
"Spanish": "spa", | |
"Swedish": "swe", | |
"Swahili": "swh", | |
"Telugu": "tel", | |
"Tagalog": "tgl", | |
"Thai": "tha", | |
"Turkish": "tur", | |
"Ukrainian": "ukr", | |
"Urdu": "urd", | |
"Northern Uzbek": "uzn", | |
"Vietnamese": "vie" | |
} | |
# Global variables to hold component references | |
components = {} | |
dotenv.load_dotenv() | |
seamless_client = Client("https://facebook-seamless-m4t-v2-large.hf.space/--replicas/2bmbx/") #TruEra | |
HuggingFace_Token = os.getenv("HuggingFace_Token") | |
hf_token = os.getenv("HuggingFace_Token") | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
image_description = "" | |
# audio_output = "" | |
# global markdown_output | |
# global audio_output | |
def check_hallucination(assertion, citation): | |
print("Entering check_hallucination function") | |
api_url = "https://api-inference.huggingface.co/models/vectara/hallucination_evaluation_model" | |
header = {"Authorization": f"Bearer {hf_token}"} | |
payload = {"inputs": f"{assertion} [SEP] {citation}"} | |
response = requests.post(api_url, headers=header, json=payload, timeout=120) | |
output = response.json() | |
output = output[0][0]["score"] | |
print(f"check_hallucination output: {output}") | |
return f"**hallucination score:** {output}" | |
# Define the API parameters | |
vapi_url = "https://api-inference.huggingface.co/models/vectara/hallucination_evaluation_model" | |
headers = {"Authorization": f"Bearer {hf_token}"} | |
# Function to query the API | |
def query(payload): | |
print("Entering query function") | |
response = requests.post(vapi_url, headers=headers, json=payload) | |
print(f"API response: {response.json()}") | |
return response.json() | |
# Function to evaluate hallucination | |
def evaluate_hallucination(input1, input2): | |
print("Entering evaluate_hallucination function") | |
combined_input = f"{input1}[SEP]{input2}" | |
output = query({"inputs": combined_input}) | |
score = output[0][0]['score'] | |
if score < 0.5: | |
label = f"🔴 High risk. Score: {score:.2f}" | |
else: | |
label = f"🟢 Low risk. Score: {score:.2f}" | |
print(f"evaluate_hallucination label: {label}") | |
return label | |
def save_audio(audio_input, output_dir="saved_audio"): | |
if not os.path.exists(output_dir): | |
os.makedirs(output_dir) | |
# Extract sample rate and audio data | |
sample_rate, audio_data = audio_input | |
# Generate a unique file name | |
file_name = f"audio_{int(time.time())}.wav" | |
file_path = os.path.join(output_dir, file_name) | |
# Save the audio file | |
sf.write(file_path, audio_data, sample_rate) | |
return file_path | |
def save_image(image_input, output_dir="saved_images"): | |
print("Entering save_image function") | |
if not os.path.exists(output_dir): | |
os.makedirs(output_dir) | |
if isinstance(image_input, np.ndarray): | |
image = Image.fromarray(image_input) | |
file_name = f"image_{int(time.time())}.png" | |
file_path = os.path.join(output_dir, file_name) | |
image.save(file_path) | |
print(f"Image saved at: {file_path}") | |
return file_path | |
else: | |
raise ValueError("Invalid image input type") | |
def process_image(image_file_path): | |
print("Entering process_image function") | |
client = Client("https://tonic1-official-qwen-vl-chat.hf.space/--replicas/4t5dh/") # TruEra | |
try: | |
result = client.predict( | |
"Describe this image in detail, identify every detail in this image. Describe the image the best you can.", | |
image_file_path, | |
fn_index=0 | |
) | |
print(f"Image processing result: {result}") | |
return result | |
except Exception as e: | |
print(f"Error in process_image: {e}") | |
return f"Error occurred during image processing: {e}" | |
def process_speech(audio_input, source_language, target_language="English"): | |
print("Entering process_speech function") | |
if audio_input is None: | |
return "No audio input provided." | |
try: | |
result = seamless_client.predict( | |
audio_input, | |
source_language, | |
target_language, | |
api_name="/s2tt" | |
) | |
print(f"Speech processing result: {result}") | |
return result | |
except Exception as e: | |
print(f"Error in process_speech: {str(e)}") | |
return f"Error in speech processing: {str(e)}" | |
def convert_text_to_speech(input_text, source_language, target_language): | |
print("Entering convert_text_to_speech function") | |
try: | |
result = seamless_client.predict( | |
input_text, | |
source_language, | |
target_language, | |
api_name="/t2st" | |
) | |
audio_file_path = result[0] if result else None | |
translated_text = result[1] if result else "" | |
print(f"Text-to-speech conversion result: Audio file path: {audio_file_path}, Translated text: {translated_text}") | |
return audio_file_path, translated_text | |
except Exception as e: | |
print(f"Error in convert_text_to_speech: {str(e)}") | |
return None, f"Error in text-to-speech conversion: {str(e)}" | |
def query_vectara(text): | |
user_message = text | |
customer_id = os.getenv('CUSTOMER_ID') | |
corpus_id = os.getenv('CORPUS_ID') | |
api_key = os.getenv('API_KEY') | |
# Define the headers | |
api_key_header = { | |
"customer-id": customer_id, | |
"x-api-key": api_key | |
} | |
# Define the request body in the structure provided in the example | |
request_body = { | |
"query": [ | |
{ | |
"query": user_message, | |
"queryContext": "", | |
"start": 1, | |
"numResults": 25, | |
"contextConfig": { | |
"charsBefore": 0, | |
"charsAfter": 0, | |
"sentencesBefore": 2, | |
"sentencesAfter": 2, | |
"startTag": "%START_SNIPPET%", | |
"endTag": "%END_SNIPPET%", | |
}, | |
"rerankingConfig": { | |
"rerankerId": 272725718, | |
"mmrConfig": { | |
"diversityBias": 0.35 | |
} | |
}, | |
"corpusKey": [ | |
{ | |
"customerId": customer_id, | |
"corpusId": corpus_id, | |
"semantics": 0, | |
"metadataFilter": "", | |
"lexicalInterpolationConfig": { | |
"lambda": 0 | |
}, | |
"dim": [] | |
} | |
], | |
"summary": [ | |
{ | |
"maxSummarizedResults": 5, | |
"responseLang": "auto", | |
"summarizerPromptName": "vectara-summary-ext-v1.2.0" | |
} | |
] | |
} | |
] | |
} | |
# Make the API request using Gradio | |
response = requests.post( | |
"https://api.vectara.io/v1/query", | |
json=request_body, # Use json to automatically serialize the request body | |
verify=True, | |
headers=api_key_header | |
) | |
if response.status_code == 200: | |
query_data = response.json() | |
if query_data: | |
sources_info = [] | |
# Extract the summary. | |
summary = query_data['responseSet'][0]['summary'][0]['text'] | |
# Iterate over all response sets | |
for response_set in query_data.get('responseSet', []): | |
# Extract sources | |
# Limit to top 5 sources. | |
for source in response_set.get('response', [])[:5]: | |
source_metadata = source.get('metadata', []) | |
source_info = {} | |
for metadata in source_metadata: | |
metadata_name = metadata.get('name', '') | |
metadata_value = metadata.get('value', '') | |
if metadata_name == 'title': | |
source_info['title'] = metadata_value | |
elif metadata_name == 'author': | |
source_info['author'] = metadata_value | |
elif metadata_name == 'pageNumber': | |
source_info['page number'] = metadata_value | |
if source_info: | |
sources_info.append(source_info) | |
result = {"summary": summary, "sources": sources_info} | |
return f"{json.dumps(result, indent=2)}" | |
else: | |
return "No data found in the response." | |
else: | |
return f"Error: {response.status_code}" | |
def wrap_text(text, width=90): | |
print("Wrapping text...") | |
lines = text.split('\n') | |
wrapped_lines = [textwrap.fill(line, width=width) for line in lines] | |
wrapped_text = '\n'.join(wrapped_lines) | |
return wrapped_text | |
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-1_8B-Chat", trust_remote_code=True) #TruEra | |
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-1_8B-Chat", device_map="auto", trust_remote_code=True).eval() | |
class ChatBot: | |
def __init__(self): | |
self.history = None | |
def predict(self, user_input, system_prompt=""): | |
print("Generating prediction...") | |
response, self.history = model.chat(tokenizer, user_input, history=self.history, system=system_prompt) | |
return response | |
bot = ChatBot() | |
def multimodal_prompt(user_input, system_prompt="You are an expert medical analyst:"): | |
print("Processing multimodal prompt...") | |
return bot.predict(user_input, system_prompt) | |
def process_summary_with_qwen(summary): | |
print("Processing summary with Qwen...") | |
system_prompt = "You are a medical instructor. Assess and describe the proper options to your students in minute detail. Propose a course of action for them to base their recommendations on based on your description." | |
response_text = bot.predict(summary, system_prompt) | |
return response_text | |
def process_and_query(input_language=None, audio_input=None, image_input=None, text_input=None): | |
try: | |
print("Processing and querying...") | |
combined_text = "" | |
markdown_output = "" | |
image_text = "" | |
print(f"Image Input Type: {type(image_input)}, Audio Input Type: {type(audio_input)}") | |
if image_input is not None: | |
print("Processing image input...") | |
image_file_path = save_image(image_input) | |
image_text = process_image(image_file_path) | |
combined_text += "\n\n**Image Input:**\n" + image_text | |
elif audio_input is not None: | |
print("Processing audio input...") | |
sample_rate, audio_data = audio_input | |
audio_file_path = save_audio(audio_input) | |
audio_text = process_speech(audio_file_path, input_language, "English") | |
combined_text += "\n\n**Audio Input:**\n" + audio_text | |
elif text_input is not None and text_input.strip(): | |
print("Processing text input...") | |
combined_text += "The user asks the query above to his health adviser: " + text_input | |
else: | |
return "Error: Please provide some input (text, audio, or image)." | |
if image_text: | |
markdown_output += "\n### Original Image Description\n" | |
markdown_output += image_text + "\n" | |
print("Querying Vectara...") | |
vectara_response_json = query_vectara(combined_text) | |
vectara_response = json.loads(vectara_response_json) | |
summary = vectara_response.get('summary', 'No summary available') | |
sources_info = vectara_response.get('sources', []) | |
markdown_output = "### Vectara Response Summary\n" | |
markdown_output += f"* **Summary**: {summary}\n" | |
markdown_output += "### Sources Information\n" | |
for source in sources_info: | |
markdown_output += f"* {source}\n" | |
final_response = process_summary_with_qwen(summary) | |
print("Converting text to speech...") | |
target_language = "English" | |
audio_output, translated_text = convert_text_to_speech(final_response, target_language, input_language) | |
print("Evaluating hallucination...") | |
try: | |
hallucination_label = evaluate_hallucination(final_response, summary) | |
except Exception as e: | |
print(f"Error in hallucination evaluation: {e}") | |
hallucination_label = "Evaluation skipped due to the model loading. For evaluation results, please try again in 29 minutes." | |
markdown_output += "\n### Processed Summary with Qwen\n" | |
markdown_output += final_response + "\n" | |
markdown_output += "\n### Hallucination Evaluation\n" | |
markdown_output += f"* **Label**: {hallucination_label}\n" | |
markdown_output += "\n### Translated Text\n" | |
markdown_output += translated_text + "\n" | |
return markdown_output, audio_output | |
except Exception as e: | |
print(f"Error occurred: {e}") | |
return f"Error occurred during processing: {e}.", None | |
def clear(): | |
return "English", None, None, "", None | |
def create_interface(): | |
with gr.Blocks(theme='ParityError/Anime') as interface: | |
# Display the welcome message | |
gr.Markdown(welcome_message) | |
# Extract the full names of the languages | |
language_names = list(languages.keys()) | |
# Add a 'None' or similar option to represent no selection | |
input_language_options = ["None"] + language_names | |
# Create a dropdown for language selection | |
input_language = gr.Dropdown(input_language_options, label="Select the language", value="English", interactive=True) | |
with gr.Accordion("Use Voice", open=False) as voice_accordion: | |
audio_input = gr.Audio(label="Speak") | |
audio_output = gr.Markdown(label="Output text") # Markdown component for audio | |
gr.Examples([["audio1.wav"], ["audio2.wav"], ], inputs=[audio_input]) | |
with gr.Accordion("Use a Picture", open=False) as picture_accordion: | |
image_input = gr.Image(label="Upload image") | |
image_output = gr.Markdown(label="Output text") # Markdown component for image | |
gr.Examples([["image1.png"], ["image2.jpeg"], ["image3.jpeg"], ], inputs=[image_input]) | |
with gr.Accordion("MultiMed", open=False) as multimend_accordion: | |
text_input = gr.Textbox(label="Use Text", lines=3, placeholder="I have had a sore throat and phlegm for a few days and now my cough has gotten worse!") | |
gr.Examples([ | |
["What is the proper treatment for buccal herpes?"], | |
["I have had a sore throat and hoarse voice for several days and now a strong cough recently "], | |
["How does cellular metabolism work TCA cycle"], | |
["What special care must be provided to children with chicken pox?"], | |
["When and how often should I wash my hands?"], | |
["بکل ہرپس کا صحیح علاج کیا ہے؟"], | |
["구강 헤르페스의 적절한 치료법은 무엇입니까?"], | |
["Je, ni matibabu gani sahihi kwa herpes ya buccal?"], | |
], inputs=[text_input]) | |
text_output = gr.Markdown(label="MultiMed") | |
audio_output = gr.Audio(label="Audio Out", type="filepath") | |
text_button = gr.Button("Use MultiMed") | |
text_button.click(process_and_query, inputs=[input_language, audio_input, image_input, text_input], outputs=[text_output, audio_output]) | |
clear_button = gr.Button("Clear") | |
clear_button.click(clear, inputs=[], outputs=[input_language, audio_input, image_input, text_output, audio_output]) | |
return interface | |
app = create_interface() | |
app.launch(show_error=True, debug=True) |