Spaces:

taesiri
/

BugsBunny-Llama-3.2-Base-Medium

Sleeping

App Files Files Community

taesiri commited on 16 days ago

Commit

cc14163

•

1 Parent(s): c19b490

update

Browse files

Files changed (1) hide show

app.py +9 -33

app.py CHANGED Viewed

@@ -6,18 +6,15 @@ from transformers import MllamaForConditionalGeneration, AutoProcessor
 from peft import PeftModel
 from huggingface_hub import login
 import spaces
-import json
 # Login to Hugging Face
 if "HF_TOKEN" not in os.environ:
-    raise ValueError(
-        "Please set the HF_TOKEN environment variable with your Hugging Face token"
-    )
 login(token=os.environ["HF_TOKEN"])
 # Load model and processor (do this outside the inference function to avoid reloading)
 base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
-lora_weights_path = "taesiri/BunsBunny-LLama-3.2-11B-Vision-Instruct-DummyTask2"
 processor = AutoProcessor.from_pretrained(base_model_path)
 model = MllamaForConditionalGeneration.from_pretrained(
@@ -27,50 +24,29 @@ model = MllamaForConditionalGeneration.from_pretrained(
 )
 model = PeftModel.from_pretrained(model, lora_weights_path)
 @spaces.GPU
 def inference(image, question):
     # Prepare input
     messages = [
-        {
-            "role": "user",
-            "content": [{"type": "image"}, {"type": "text", "text": question}],
-        }
     ]
     input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
-    inputs = processor(
-        image, input_text, add_special_tokens=False, return_tensors="pt"
-    ).to(model.device)
     # Run inference
     with torch.no_grad():
         output = model.generate(**inputs, max_new_tokens=2048)
     # Decode output
     result = processor.decode(output[0], skip_special_tokens=True)
-    # Try to extract and parse JSON from the response
-    try:
-        # Split the result to get content after "assistant"
-        text_after_assistant = result.strip().split("assistant\n")[1].strip()
-        # Attempt to parse as JSON (double-loaded)
-        json_data = json.loads(json.loads(text_after_assistant))
-        return json.dumps(json_data, indent=2)
-    except (IndexError, json.JSONDecodeError):
-        # If JSON parsing fails, return the text after "assistant" or the full result
-        try:
-            return result.strip().split("assistant\n")[1].strip()
-        except IndexError:
-            return result.strip()
 # Create Gradio interface
 demo = gr.Interface(
     fn=inference,
     inputs=[
         gr.Image(type="pil", label="Upload Image"),
-        gr.Textbox(label="Enter your question"),
     ],
     outputs=gr.Textbox(label="Response"),
     title="Image Analysis AI",
@@ -78,4 +54,4 @@ demo = gr.Interface(
 )
 if __name__ == "__main__":
-    demo.launch()

 from peft import PeftModel
 from huggingface_hub import login
 import spaces
 # Login to Hugging Face
 if "HF_TOKEN" not in os.environ:
+    raise ValueError("Please set the HF_TOKEN environment variable with your Hugging Face token")
 login(token=os.environ["HF_TOKEN"])
 # Load model and processor (do this outside the inference function to avoid reloading)
 base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
+lora_weights_path = "taesiri/BungsBunny-LLama-3.2-11B-Vision-Instruct-Medium"
 processor = AutoProcessor.from_pretrained(base_model_path)
 model = MllamaForConditionalGeneration.from_pretrained(
 )
 model = PeftModel.from_pretrained(model, lora_weights_path)
 @spaces.GPU
 def inference(image, question):
     # Prepare input
     messages = [
+        {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": question}]}
     ]
     input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
+    inputs = processor(image, input_text, add_special_tokens=False, return_tensors="pt").to(model.device)
     # Run inference
     with torch.no_grad():
         output = model.generate(**inputs, max_new_tokens=2048)
     # Decode output
     result = processor.decode(output[0], skip_special_tokens=True)
+    return result.strip().split("assistant\n")[1].strip()
 # Create Gradio interface
 demo = gr.Interface(
     fn=inference,
     inputs=[
         gr.Image(type="pil", label="Upload Image"),
+        gr.Textbox(label="Enter your question")
     ],
     outputs=gr.Textbox(label="Response"),
     title="Image Analysis AI",
 )
 if __name__ == "__main__":
+    demo.launch()