Spaces:

sounar
/

ContactDoctor-API

Paused

App Files Files Community

sounar commited on Nov 18, 2024

Commit

72ff248

verified ·

1 Parent(s): 4646254

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -52

app.py CHANGED Viewed

@@ -1,69 +1,57 @@
-import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import torch
-import os
 # Retrieve the token from environment variables
-api_token = os.getenv("HF_TOKEN").strip()
-# Model name
-model_name = "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1"
-# Load the Hugging Face model and tokenizer with required arguments
-tokenizer = AutoTokenizer.from_pretrained(
-    model_name,
-    token=api_token,  # Authenticate with Hugging Face token
-    trust_remote_code=True  # Allow custom code from the repository
 )
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    token=api_token,
     trust_remote_code=True,
-    device_map="auto",  # Efficient device allocation
-    torch_dtype=torch.float16  # Mixed precision for faster inference
 )
-# Define the function to process user input
-def generate_response(input_text):
-    try:
-        # Tokenize the input text
-        inputs = tokenizer(input_text, return_tensors="pt")
-        # Ensure input tensor is sent to the same device as the model
-        input_ids = inputs["input_ids"].to(model.device)
-        # Add batch dimension (if missing)
-        if len(input_ids.shape) == 1:  # If shape is (seq_len,)
-            input_ids = input_ids.unsqueeze(0)  # Add batch dimension: (1, seq_len)
-        # Generate a response using the model
-        outputs = model.generate(
-            input_ids,
-            max_length=256,  # Limit the output length
-            num_return_sequences=1,  # Generate a single response
-            temperature=0.7,  # Adjust for creativity vs. determinism
-            top_p=0.9,  # Nucleus sampling
-            top_k=50  # Top-k sampling
-        )
-        # Decode and return the generated text
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return response
-    except Exception as e:
-        # Return error details in case of failure
-        return f"Error: {str(e)}"
-# Create a Gradio interface
 iface = gr.Interface(
-    fn=generate_response,
-    inputs="text",
-    outputs="text",
-    title="ContactDoctor Medical Assistant",
-    description="Provide input symptoms or queries and get AI-powered medical advice."
 )
-# Launch the Gradio app
 if __name__ == "__main__":
     iface.launch()

 # Retrieve the token from environment variables
+#api_token = os.getenv("HF_TOKEN").strip()
+import torch
+from PIL import Image
+from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
+import gradio as gr
+# Load the model and tokenizer
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_compute_dtype=torch.float16,
 )
+model = AutoModel.from_pretrained(
+    "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1",
+    quantization_config=bnb_config,
+    device_map="auto",
+    torch_dtype=torch.float16,
     trust_remote_code=True,
+    attn_implementation="flash_attention_2",
 )
+tokenizer = AutoTokenizer.from_pretrained(
+    "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1",
+    trust_remote_code=True
+)
+# Define the function to handle the input
+def process_input(image, question):
+    image = Image.open(image).convert("RGB")
+    msgs = [{'role': 'user', 'content': [image, question]}]
+    res = model.chat(image=image, msgs=msgs, tokenizer=tokenizer, sampling=True, temperature=0.95, stream=True)
+    generated_text = ""
+    for new_text in res:
+        generated_text += new_text
+    return generated_text
+# Gradio interface
 iface = gr.Interface(
+    fn=process_input,
+    inputs=[
+        gr.Image(type="file", label="Upload Image"),
+        gr.Textbox(lines=2, label="Question")
+    ],
+    outputs=gr.Textbox(label="Generated Response"),
+    title="BioMedical MultiModal Llama",
+    description="Upload an image and ask a medical question."
 )
 if __name__ == "__main__":
     iface.launch()