taesiri commited on
Commit
a4b7e70
1 Parent(s): cc14163

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -6
app.py CHANGED
@@ -6,6 +6,8 @@ from transformers import MllamaForConditionalGeneration, AutoProcessor
6
  from peft import PeftModel
7
  from huggingface_hub import login
8
  import spaces
 
 
9
 
10
  # Login to Hugging Face
11
  if "HF_TOKEN" not in os.environ:
@@ -20,15 +22,17 @@ processor = AutoProcessor.from_pretrained(base_model_path)
20
  model = MllamaForConditionalGeneration.from_pretrained(
21
  base_model_path,
22
  torch_dtype=torch.bfloat16,
23
- device_map="auto",
24
  )
25
  model = PeftModel.from_pretrained(model, lora_weights_path)
 
 
26
 
27
  @spaces.GPU
28
- def inference(image, question):
29
  # Prepare input
30
  messages = [
31
- {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": question}]}
32
  ]
33
  input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
34
  inputs = processor(image, input_text, add_special_tokens=False, return_tensors="pt").to(model.device)
@@ -39,14 +43,34 @@ def inference(image, question):
39
 
40
  # Decode output
41
  result = processor.decode(output[0], skip_special_tokens=True)
42
- return result.strip().split("assistant\n")[1].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  # Create Gradio interface
45
  demo = gr.Interface(
46
  fn=inference,
47
  inputs=[
48
- gr.Image(type="pil", label="Upload Image"),
49
- gr.Textbox(label="Enter your question")
50
  ],
51
  outputs=gr.Textbox(label="Response"),
52
  title="Image Analysis AI",
 
6
  from peft import PeftModel
7
  from huggingface_hub import login
8
  import spaces
9
+ import json
10
+
11
 
12
  # Login to Hugging Face
13
  if "HF_TOKEN" not in os.environ:
 
22
  model = MllamaForConditionalGeneration.from_pretrained(
23
  base_model_path,
24
  torch_dtype=torch.bfloat16,
25
+ device_map="cuda",
26
  )
27
  model = PeftModel.from_pretrained(model, lora_weights_path)
28
+ model.tie_weights()
29
+
30
 
31
  @spaces.GPU
32
+ def inference(image):
33
  # Prepare input
34
  messages = [
35
+ {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": "Describe the image in JSON"}]}
36
  ]
37
  input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
38
  inputs = processor(image, input_text, add_special_tokens=False, return_tensors="pt").to(model.device)
 
43
 
44
  # Decode output
45
  result = processor.decode(output[0], skip_special_tokens=True)
46
+ json_str = result.strip().split("assistant\n")[1].strip()
47
+
48
+ try:
49
+ # First JSON parse to handle escaped JSON string
50
+ first_parse = json.loads(json_str)
51
+
52
+ try:
53
+ # Second JSON parse to get the actual JSON object
54
+ json_object = json.loads(first_parse)
55
+ # Return indented JSON string with 2 spaces
56
+ return json.dumps(json_object, indent=2)
57
+ except json.JSONDecodeError:
58
+ # If second parse fails, return the result of first parse indented
59
+ if isinstance(first_parse, (dict, list)):
60
+ return json.dumps(first_parse, indent=2)
61
+ return first_parse
62
+
63
+ except json.JSONDecodeError:
64
+ # If both JSON parses fail, return original string
65
+ return json_str
66
+
67
+ return None # In case of unexpected errors
68
 
69
  # Create Gradio interface
70
  demo = gr.Interface(
71
  fn=inference,
72
  inputs=[
73
+ gr.Image(type="pil", label="Upload Image")
 
74
  ],
75
  outputs=gr.Textbox(label="Response"),
76
  title="Image Analysis AI",