SixOpen commited on
Commit
d4d8547
·
verified ·
1 Parent(s): 6f31b98

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -22
app.py CHANGED
@@ -14,7 +14,6 @@ import cv2
14
  import io
15
  import uuid
16
 
17
-
18
  def workaround_fixed_get_imports(filename: str | os.PathLike) -> list[str]:
19
  if not str(filename).endswith("/modeling_florence2.py"):
20
  return get_imports(filename)
@@ -39,26 +38,67 @@ def run_example(task_prompt, image, text_input=None):
39
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
40
  return processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.size[0], image.size[1]))
41
 
42
- def plot_bbox(image, data):
43
- img_draw = image.copy()
44
- draw = ImageDraw.Draw(img_draw)
45
- for bbox, label in zip(data['bboxes'], data['labels']):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  x1, y1, x2, y2 = bbox
47
- draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
48
- draw.text((x1, y1), label, fill="white")
49
- return np.array(img_draw)
 
 
 
50
 
51
- def draw_polygons(image, prediction, fill_mask=False):
52
- img_draw = image.copy()
53
- draw = ImageDraw.Draw(img_draw)
54
  for polygons, label in zip(prediction.get('polygons', []), prediction.get('labels', [])):
55
  color = random.choice(colormap)
56
  for polygon in polygons:
57
  if isinstance(polygon[0], (int, float)):
58
  polygon = [(polygon[i], polygon[i+1]) for i in range(0, len(polygon), 2)]
59
- draw.polygon(polygon, outline=color, fill=color if fill_mask else None)
 
60
  if polygon:
61
- draw.text(polygon[0], label, fill="white")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  return np.array(img_draw)
63
 
64
  @spaces.GPU(duration=120)
@@ -151,17 +191,17 @@ def process_image(image, task, text):
151
  "Caption": ("<CAPTION>", lambda result: (result['<CAPTION>'], image)),
152
  "Detailed Caption": ("<DETAILED_CAPTION>", lambda result: (result['<DETAILED_CAPTION>'], image)),
153
  "More Detailed Caption": ("<MORE_DETAILED_CAPTION>", lambda result: (result['<MORE_DETAILED_CAPTION>'], image)),
154
- "Caption to Phrase Grounding": ("<CAPTION_TO_PHRASE_GROUNDING>", lambda result: (str(result['<CAPTION_TO_PHRASE_GROUNDING>']), Image.fromarray(plot_bbox(image, result['<CAPTION_TO_PHRASE_GROUNDING>'])))),
155
- "Object Detection": ("<OD>", lambda result: (str(result['<OD>']), Image.fromarray(plot_bbox(image, result['<OD>'])))),
156
- "Dense Region Caption": ("<DENSE_REGION_CAPTION>", lambda result: (str(result['<DENSE_REGION_CAPTION>']), Image.fromarray(draw_polygons(image, result['<DENSE_REGION_CAPTION>'], fill_mask=True)))),
157
- "Region Proposal": ("<REGION_PROPOSAL>", lambda result: (str(result['<REGION_PROPOSAL>']), Image.fromarray(plot_bbox(image, result['<REGION_PROPOSAL>'])))),
158
- "Referring Expression Segmentation": ("<REFERRING_EXPRESSION_SEGMENTATION>", lambda result: (str(result['<REFERRING_EXPRESSION_SEGMENTATION>']), Image.fromarray(draw_polygons(image, result['<REFERRING_EXPRESSION_SEGMENTATION>'], fill_mask=True)))),
159
- "Region to Segmentation": ("<REGION_TO_SEGMENTATION>", lambda result: (str(result['<REGION_TO_SEGMENTATION>']), Image.fromarray(draw_polygons(image, result['<REGION_TO_SEGMENTATION>'], fill_mask=True)))),
160
- "Open Vocabulary Detection": ("<OPEN_VOCABULARY_DETECTION>", lambda result: (str(result['<OPEN_VOCABULARY_DETECTION>']), Image.fromarray(plot_bbox(image, result['<OPEN_VOCABULARY_DETECTION>'])))),
161
  "Region to Category": ("<REGION_TO_CATEGORY>", lambda result: (result['<REGION_TO_CATEGORY>'], image)),
162
  "Region to Description": ("<REGION_TO_DESCRIPTION>", lambda result: (result['<REGION_TO_DESCRIPTION>'], image)),
163
  "OCR": ("<OCR>", lambda result: (result['<OCR>'], image)),
164
- "OCR with Region": ("<OCR_WITH_REGION>", lambda result: (str(result['<OCR_WITH_REGION>']), Image.fromarray(plot_bbox(image, result['<OCR_WITH_REGION>'])))),
165
  }
166
 
167
  if task in task_mapping:
@@ -222,7 +262,7 @@ with gr.Blocks() as demo:
222
  "A green car parked in front of a yellow building."
223
  ],
224
  [
225
- "https://datasets-server.huggingface.co/assets/huggingface/documentation-images/--/566a43334e8b6331dddd8142495bc2f3209f32b0/--/default/validation/3/image/image.jpg?Expires=1718892641&Signature=GFpkyFBNrVf~Mq0jFjbpXWQLCOQblOm6Y1R57zl0tZOKWg5lfK8Jv1Tkxv35sMOARYDiJEE7C0hIp0fKazo1lYbv0ZTAKkwHUY2RroifVea4JRCyovJVptsmIZnlXkJU68N7bJhh8K07cu04G5mqaLRRehqDABKqEqgIdtBS5WcUXdoqkl0Fh2c8KN3GK9hZba9E6ZouBXhuffEEzykss1pIm6MW-WLx5l7~RXKu6BwcFq~6--3KoYVM4U~aEQdgTJg6P2ESH4DkEWN8Qpf~vaHBi2CZQSGurM1U0sZqIYrSLPaUov1h00MQMmnNEzMDZUeIq7~j07hVmwWgflQZeA__&Key-Pair-Id=K3EI6M078Z3AC3",
226
  "OCR",
227
  ""
228
  ]
 
14
  import io
15
  import uuid
16
 
 
17
  def workaround_fixed_get_imports(filename: str | os.PathLike) -> list[str]:
18
  if not str(filename).endswith("/modeling_florence2.py"):
19
  return get_imports(filename)
 
38
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
39
  return processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.size[0], image.size[1]))
40
 
41
+ def fig_to_pil(fig):
42
+ buf = io.BytesIO()
43
+ fig.savefig(buf, format='png', dpi=300, bbox_inches='tight')
44
+ buf.seek(0)
45
+ return Image.open(buf)
46
+
47
+ def plot_bbox_img(image, data):
48
+ fig, ax = plt.subplots(figsize=(10, 10))
49
+ ax.imshow(image)
50
+
51
+ if 'bboxes' in data and 'labels' in data:
52
+ bboxes, labels = data['bboxes'], data['labels']
53
+ elif 'bboxes' in data and 'bboxes_labels' in data:
54
+ bboxes, labels = data['bboxes'], data['bboxes_labels']
55
+ else:
56
+ return fig_to_pil(fig)
57
+
58
+ for bbox, label in zip(bboxes, labels):
59
  x1, y1, x2, y2 = bbox
60
+ rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=2, edgecolor='indigo', facecolor='none')
61
+ ax.add_patch(rect)
62
+ plt.text(x1, y1, label, color='white', fontsize=10, bbox=dict(facecolor='indigo', alpha=0.8))
63
+
64
+ ax.axis('off')
65
+ return fig_to_pil(fig)
66
 
67
+ def draw_poly_img(image, prediction, fill_mask=False):
68
+ fig, ax = plt.subplots(figsize=(10, 10))
69
+ ax.imshow(image)
70
  for polygons, label in zip(prediction.get('polygons', []), prediction.get('labels', [])):
71
  color = random.choice(colormap)
72
  for polygon in polygons:
73
  if isinstance(polygon[0], (int, float)):
74
  polygon = [(polygon[i], polygon[i+1]) for i in range(0, len(polygon), 2)]
75
+ poly = patches.Polygon(polygon, edgecolor=color, facecolor=color if fill_mask else 'none', alpha=0.5 if fill_mask else 1, linewidth=2)
76
+ ax.add_patch(poly)
77
  if polygon:
78
+ plt.text(polygon[0][0], polygon[0][1], label, color='white', fontsize=10, bbox=dict(facecolor=color, alpha=0.8))
79
+ ax.axis('off')
80
+ return fig_to_pil(fig)
81
+
82
+ def draw_ocr_bboxes(image, prediction):
83
+ fig, ax = plt.subplots(figsize=(10, 10))
84
+ ax.imshow(image)
85
+ bboxes, labels = prediction['quad_boxes'], prediction['labels']
86
+ for box, label in zip(bboxes, labels):
87
+ color = random.choice(colormap)
88
+ box_array = np.array(box).reshape(-1, 2) # respect format
89
+ polygon = patches.Polygon(box_array, edgecolor=color, fill=False, linewidth=2)
90
+ ax.add_patch(polygon)
91
+ plt.text(box_array[0, 0], box_array[0, 1], label, color='white', fontsize=10, bbox=dict(facecolor=color, alpha=0.8))
92
+ ax.axis('off')
93
+ return fig_to_pil(fig)
94
+
95
+ def plot_bbox(image, data):
96
+ img_draw = image.copy()
97
+ draw = ImageDraw.Draw(img_draw)
98
+ for bbox, label in zip(data['bboxes'], data['labels']):
99
+ x1, y1, x2, y2 = bbox
100
+ draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
101
+ draw.text((x1, y1), label, fill="white")
102
  return np.array(img_draw)
103
 
104
  @spaces.GPU(duration=120)
 
191
  "Caption": ("<CAPTION>", lambda result: (result['<CAPTION>'], image)),
192
  "Detailed Caption": ("<DETAILED_CAPTION>", lambda result: (result['<DETAILED_CAPTION>'], image)),
193
  "More Detailed Caption": ("<MORE_DETAILED_CAPTION>", lambda result: (result['<MORE_DETAILED_CAPTION>'], image)),
194
+ "Caption to Phrase Grounding": ("<CAPTION_TO_PHRASE_GROUNDING>", lambda result: (str(result['<CAPTION_TO_PHRASE_GROUNDING>']), plot_bbox_img(image, result['<CAPTION_TO_PHRASE_GROUNDING>']))),
195
+ "Object Detection": ("<OD>", lambda result: (str(result['<OD>']), plot_bbox_img(image, result['<OD>']))),
196
+ "Dense Region Caption": ("<DENSE_REGION_CAPTION>", lambda result: (str(result['<DENSE_REGION_CAPTION>']), plot_bbox_img(image, result['<DENSE_REGION_CAPTION>']))),
197
+ "Region Proposal": ("<REGION_PROPOSAL>", lambda result: (str(result['<REGION_PROPOSAL>']), plot_bbox_img(image, result['<REGION_PROPOSAL>']))),
198
+ "Referring Expression Segmentation": ("<REFERRING_EXPRESSION_SEGMENTATION>", lambda result: (str(result['<REFERRING_EXPRESSION_SEGMENTATION>']), draw_poly_img(image, result['<REFERRING_EXPRESSION_SEGMENTATION>'], fill_mask=True))),
199
+ "Region to Segmentation": ("<REGION_TO_SEGMENTATION>", lambda result: (str(result['<REGION_TO_SEGMENTATION>']), draw_poly_img(image, result['<REGION_TO_SEGMENTATION>'], fill_mask=True))),
200
+ "Open Vocabulary Detection": ("<OPEN_VOCABULARY_DETECTION>", lambda result: (str(result['<OPEN_VOCABULARY_DETECTION>']), plot_bbox_img(image, result['<OPEN_VOCABULARY_DETECTION>']))),
201
  "Region to Category": ("<REGION_TO_CATEGORY>", lambda result: (result['<REGION_TO_CATEGORY>'], image)),
202
  "Region to Description": ("<REGION_TO_DESCRIPTION>", lambda result: (result['<REGION_TO_DESCRIPTION>'], image)),
203
  "OCR": ("<OCR>", lambda result: (result['<OCR>'], image)),
204
+ "OCR with Region": ("<OCR_WITH_REGION>", lambda result: (str(result['<OCR_WITH_REGION>']), draw_ocr_bboxes(image, result['<OCR_WITH_REGION>']))),
205
  }
206
 
207
  if task in task_mapping:
 
262
  "A green car parked in front of a yellow building."
263
  ],
264
  [
265
+ "http://ecx.images-amazon.com/images/I/51UUzBDAMsL.jpg?download=true",
266
  "OCR",
267
  ""
268
  ]