Spaces:

spoorthibhat
/

Llava-Med

Paused

App Files Files Community

spoorthibhat commited on Dec 10, 2024

Commit

36cca9a

verified ·

1 Parent(s): e693582

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -47

app.py CHANGED Viewed

@@ -11,26 +11,17 @@ print(torch.cuda.is_available())
 print(os.system('python -m bitsandbytes'))
-import os
-import torch
-import warnings
-warnings.filterwarnings('ignore')
 import io
 from contextlib import redirect_stdout
-import gradio as gr
-from transformers import AutoTokenizer
-from llava.model.builder import load_pretrained_model
-from llava.mm_utils import get_model_name_from_path
 from llava.eval.run_llava import eval_model
-# Check CUDA availability with error handling
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"Using device: {device}")
-# Define the model path
 model_path = "Veda0718/llava-med-v1.5-mistral-7b-finetuned"
 kwargs = {"device_map": "auto"}
 kwargs['load_in_4bit'] = True
 kwargs['quantization_config'] = BitsAndBytesConfig(
@@ -42,48 +33,44 @@ kwargs['quantization_config'] = BitsAndBytesConfig(
 model = LlavaMistralForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, **kwargs)
 tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
-# Define the inference function
-def run_inference(image, question):
-    if model is None:
-        return "Model failed to load. Please check the logs."
-    args = type('Args', (), {
-        "model_path": model_path,
-        "model_base": None,
-        "image_file": image,
-        "query": question,
-        "conv_mode": None,
-        "sep": ",",
-        "temperature": 0,
-        "top_p": None,
-        "num_beams": 1,
-        "max_new_tokens": 256
-    })()
-    # Capture the printed output of eval_model
-    f = io.StringIO()
-    with redirect_stdout(f):
-        eval_model(args)
-    output = f.getvalue()
-    return output
-# Create the Gradio interface
 with gr.Blocks(theme=gr.themes.Monochrome()) as app:
     with gr.Column(scale=1):
-        gr.Markdown("<center><h1>LLaVA-Med</h1></center>")
         with gr.Row():
             image = gr.Image(type="filepath", scale=2)
-            question = gr.Textbox(placeholder="Enter a question", scale=3)
         with gr.Row():
-            answer = gr.Textbox(placeholder="Answer pops up here", scale=1)
         with gr.Row():
             btn = gr.Button("Run Inference", scale=1)
         btn.click(fn=run_inference, inputs=[image, question], outputs=answer)
-# Launch the app
-if __name__ == "__main__":
-    app.queue().launch(debug=True)

 print(os.system('python -m bitsandbytes'))
+import gradio as gr
 import io
 from contextlib import redirect_stdout
+import openai
+import torch
+from transformers import AutoTokenizer, BitsAndBytesConfig
+from llava.model import LlavaMistralForCausalLM
 from llava.eval.run_llava import eval_model
+# LLaVa-Med model setup
 model_path = "Veda0718/llava-med-v1.5-mistral-7b-finetuned"
 kwargs = {"device_map": "auto"}
 kwargs['load_in_4bit'] = True
 kwargs['quantization_config'] = BitsAndBytesConfig(
 model = LlavaMistralForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, **kwargs)
 tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
 with gr.Blocks(theme=gr.themes.Monochrome()) as app:
     with gr.Column(scale=1):
+        gr.Markdown("<center><h1>LLaVa-Med</h1></center>")
         with gr.Row():
             image = gr.Image(type="filepath", scale=2)
+            question = gr.Textbox(placeholder="Enter a question", label="Question", scale=3)
         with gr.Row():
+            answer = gr.Textbox(placeholder="Answer pops up here", label="Answer", scale=1)
+        def run_inference(image, question):
+            # Arguments for the model
+            args = type('Args', (), {
+                "model_path": model_path,
+                "model_base": None,
+                "image_file": image,
+                "query": question,
+                "conv_mode": None,
+                "sep": ",",
+                "temperature": 0,
+                "top_p": None,
+                "num_beams": 1,
+                "max_new_tokens": 512
+            })()
+            # Capture the printed output of eval_model
+            f = io.StringIO()
+            with redirect_stdout(f):
+                eval_model(args)
+            llava_med_result = f.getvalue()
+            print(llava_med_result)
+            return llava_med_result
         with gr.Row():
             btn = gr.Button("Run Inference", scale=1)
         btn.click(fn=run_inference, inputs=[image, question], outputs=answer)
+app.launch(debug=True, height=800, width="100%")