Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,8 @@ from transformers import MllamaForConditionalGeneration, AutoProcessor
|
|
6 |
from peft import PeftModel
|
7 |
from huggingface_hub import login
|
8 |
import spaces
|
|
|
|
|
9 |
|
10 |
# Login to Hugging Face
|
11 |
if "HF_TOKEN" not in os.environ:
|
@@ -20,15 +22,17 @@ processor = AutoProcessor.from_pretrained(base_model_path)
|
|
20 |
model = MllamaForConditionalGeneration.from_pretrained(
|
21 |
base_model_path,
|
22 |
torch_dtype=torch.bfloat16,
|
23 |
-
device_map="
|
24 |
)
|
25 |
model = PeftModel.from_pretrained(model, lora_weights_path)
|
|
|
|
|
26 |
|
27 |
@spaces.GPU
|
28 |
-
def inference(image
|
29 |
# Prepare input
|
30 |
messages = [
|
31 |
-
{"role": "user", "content": [{"type": "image"}, {"type": "text", "text":
|
32 |
]
|
33 |
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
|
34 |
inputs = processor(image, input_text, add_special_tokens=False, return_tensors="pt").to(model.device)
|
@@ -39,14 +43,34 @@ def inference(image, question):
|
|
39 |
|
40 |
# Decode output
|
41 |
result = processor.decode(output[0], skip_special_tokens=True)
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
# Create Gradio interface
|
45 |
demo = gr.Interface(
|
46 |
fn=inference,
|
47 |
inputs=[
|
48 |
-
gr.Image(type="pil", label="Upload Image")
|
49 |
-
gr.Textbox(label="Enter your question")
|
50 |
],
|
51 |
outputs=gr.Textbox(label="Response"),
|
52 |
title="Image Analysis AI",
|
|
|
6 |
from peft import PeftModel
|
7 |
from huggingface_hub import login
|
8 |
import spaces
|
9 |
+
import json
|
10 |
+
|
11 |
|
12 |
# Login to Hugging Face
|
13 |
if "HF_TOKEN" not in os.environ:
|
|
|
22 |
model = MllamaForConditionalGeneration.from_pretrained(
|
23 |
base_model_path,
|
24 |
torch_dtype=torch.bfloat16,
|
25 |
+
device_map="cuda",
|
26 |
)
|
27 |
model = PeftModel.from_pretrained(model, lora_weights_path)
|
28 |
+
model.tie_weights()
|
29 |
+
|
30 |
|
31 |
@spaces.GPU
|
32 |
+
def inference(image):
|
33 |
# Prepare input
|
34 |
messages = [
|
35 |
+
{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": "Describe the image in JSON"}]}
|
36 |
]
|
37 |
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
|
38 |
inputs = processor(image, input_text, add_special_tokens=False, return_tensors="pt").to(model.device)
|
|
|
43 |
|
44 |
# Decode output
|
45 |
result = processor.decode(output[0], skip_special_tokens=True)
|
46 |
+
json_str = result.strip().split("assistant\n")[1].strip()
|
47 |
+
|
48 |
+
try:
|
49 |
+
# First JSON parse to handle escaped JSON string
|
50 |
+
first_parse = json.loads(json_str)
|
51 |
+
|
52 |
+
try:
|
53 |
+
# Second JSON parse to get the actual JSON object
|
54 |
+
json_object = json.loads(first_parse)
|
55 |
+
# Return indented JSON string with 2 spaces
|
56 |
+
return json.dumps(json_object, indent=2)
|
57 |
+
except json.JSONDecodeError:
|
58 |
+
# If second parse fails, return the result of first parse indented
|
59 |
+
if isinstance(first_parse, (dict, list)):
|
60 |
+
return json.dumps(first_parse, indent=2)
|
61 |
+
return first_parse
|
62 |
+
|
63 |
+
except json.JSONDecodeError:
|
64 |
+
# If both JSON parses fail, return original string
|
65 |
+
return json_str
|
66 |
+
|
67 |
+
return None # In case of unexpected errors
|
68 |
|
69 |
# Create Gradio interface
|
70 |
demo = gr.Interface(
|
71 |
fn=inference,
|
72 |
inputs=[
|
73 |
+
gr.Image(type="pil", label="Upload Image")
|
|
|
74 |
],
|
75 |
outputs=gr.Textbox(label="Response"),
|
76 |
title="Image Analysis AI",
|