Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -14,7 +14,6 @@ import cv2
|
|
14 |
import io
|
15 |
import uuid
|
16 |
|
17 |
-
|
18 |
def workaround_fixed_get_imports(filename: str | os.PathLike) -> list[str]:
|
19 |
if not str(filename).endswith("/modeling_florence2.py"):
|
20 |
return get_imports(filename)
|
@@ -39,26 +38,67 @@ def run_example(task_prompt, image, text_input=None):
|
|
39 |
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
|
40 |
return processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.size[0], image.size[1]))
|
41 |
|
42 |
-
def
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
x1, y1, x2, y2 = bbox
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
50 |
|
51 |
-
def
|
52 |
-
|
53 |
-
|
54 |
for polygons, label in zip(prediction.get('polygons', []), prediction.get('labels', [])):
|
55 |
color = random.choice(colormap)
|
56 |
for polygon in polygons:
|
57 |
if isinstance(polygon[0], (int, float)):
|
58 |
polygon = [(polygon[i], polygon[i+1]) for i in range(0, len(polygon), 2)]
|
59 |
-
|
|
|
60 |
if polygon:
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
return np.array(img_draw)
|
63 |
|
64 |
@spaces.GPU(duration=120)
|
@@ -151,17 +191,17 @@ def process_image(image, task, text):
|
|
151 |
"Caption": ("<CAPTION>", lambda result: (result['<CAPTION>'], image)),
|
152 |
"Detailed Caption": ("<DETAILED_CAPTION>", lambda result: (result['<DETAILED_CAPTION>'], image)),
|
153 |
"More Detailed Caption": ("<MORE_DETAILED_CAPTION>", lambda result: (result['<MORE_DETAILED_CAPTION>'], image)),
|
154 |
-
"Caption to Phrase Grounding": ("<CAPTION_TO_PHRASE_GROUNDING>", lambda result: (str(result['<CAPTION_TO_PHRASE_GROUNDING>']),
|
155 |
-
"Object Detection": ("<OD>", lambda result: (str(result['<OD>']),
|
156 |
-
"Dense Region Caption": ("<DENSE_REGION_CAPTION>", lambda result: (str(result['<DENSE_REGION_CAPTION>']),
|
157 |
-
"Region Proposal": ("<REGION_PROPOSAL>", lambda result: (str(result['<REGION_PROPOSAL>']),
|
158 |
-
"Referring Expression Segmentation": ("<REFERRING_EXPRESSION_SEGMENTATION>", lambda result: (str(result['<REFERRING_EXPRESSION_SEGMENTATION>']),
|
159 |
-
"Region to Segmentation": ("<REGION_TO_SEGMENTATION>", lambda result: (str(result['<REGION_TO_SEGMENTATION>']),
|
160 |
-
"Open Vocabulary Detection": ("<OPEN_VOCABULARY_DETECTION>", lambda result: (str(result['<OPEN_VOCABULARY_DETECTION>']),
|
161 |
"Region to Category": ("<REGION_TO_CATEGORY>", lambda result: (result['<REGION_TO_CATEGORY>'], image)),
|
162 |
"Region to Description": ("<REGION_TO_DESCRIPTION>", lambda result: (result['<REGION_TO_DESCRIPTION>'], image)),
|
163 |
"OCR": ("<OCR>", lambda result: (result['<OCR>'], image)),
|
164 |
-
"OCR with Region": ("<OCR_WITH_REGION>", lambda result: (str(result['<OCR_WITH_REGION>']),
|
165 |
}
|
166 |
|
167 |
if task in task_mapping:
|
@@ -222,7 +262,7 @@ with gr.Blocks() as demo:
|
|
222 |
"A green car parked in front of a yellow building."
|
223 |
],
|
224 |
[
|
225 |
-
"
|
226 |
"OCR",
|
227 |
""
|
228 |
]
|
|
|
14 |
import io
|
15 |
import uuid
|
16 |
|
|
|
17 |
def workaround_fixed_get_imports(filename: str | os.PathLike) -> list[str]:
|
18 |
if not str(filename).endswith("/modeling_florence2.py"):
|
19 |
return get_imports(filename)
|
|
|
38 |
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
|
39 |
return processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.size[0], image.size[1]))
|
40 |
|
41 |
+
def fig_to_pil(fig):
|
42 |
+
buf = io.BytesIO()
|
43 |
+
fig.savefig(buf, format='png', dpi=300, bbox_inches='tight')
|
44 |
+
buf.seek(0)
|
45 |
+
return Image.open(buf)
|
46 |
+
|
47 |
+
def plot_bbox_img(image, data):
|
48 |
+
fig, ax = plt.subplots(figsize=(10, 10))
|
49 |
+
ax.imshow(image)
|
50 |
+
|
51 |
+
if 'bboxes' in data and 'labels' in data:
|
52 |
+
bboxes, labels = data['bboxes'], data['labels']
|
53 |
+
elif 'bboxes' in data and 'bboxes_labels' in data:
|
54 |
+
bboxes, labels = data['bboxes'], data['bboxes_labels']
|
55 |
+
else:
|
56 |
+
return fig_to_pil(fig)
|
57 |
+
|
58 |
+
for bbox, label in zip(bboxes, labels):
|
59 |
x1, y1, x2, y2 = bbox
|
60 |
+
rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=2, edgecolor='indigo', facecolor='none')
|
61 |
+
ax.add_patch(rect)
|
62 |
+
plt.text(x1, y1, label, color='white', fontsize=10, bbox=dict(facecolor='indigo', alpha=0.8))
|
63 |
+
|
64 |
+
ax.axis('off')
|
65 |
+
return fig_to_pil(fig)
|
66 |
|
67 |
+
def draw_poly_img(image, prediction, fill_mask=False):
|
68 |
+
fig, ax = plt.subplots(figsize=(10, 10))
|
69 |
+
ax.imshow(image)
|
70 |
for polygons, label in zip(prediction.get('polygons', []), prediction.get('labels', [])):
|
71 |
color = random.choice(colormap)
|
72 |
for polygon in polygons:
|
73 |
if isinstance(polygon[0], (int, float)):
|
74 |
polygon = [(polygon[i], polygon[i+1]) for i in range(0, len(polygon), 2)]
|
75 |
+
poly = patches.Polygon(polygon, edgecolor=color, facecolor=color if fill_mask else 'none', alpha=0.5 if fill_mask else 1, linewidth=2)
|
76 |
+
ax.add_patch(poly)
|
77 |
if polygon:
|
78 |
+
plt.text(polygon[0][0], polygon[0][1], label, color='white', fontsize=10, bbox=dict(facecolor=color, alpha=0.8))
|
79 |
+
ax.axis('off')
|
80 |
+
return fig_to_pil(fig)
|
81 |
+
|
82 |
+
def draw_ocr_bboxes(image, prediction):
|
83 |
+
fig, ax = plt.subplots(figsize=(10, 10))
|
84 |
+
ax.imshow(image)
|
85 |
+
bboxes, labels = prediction['quad_boxes'], prediction['labels']
|
86 |
+
for box, label in zip(bboxes, labels):
|
87 |
+
color = random.choice(colormap)
|
88 |
+
box_array = np.array(box).reshape(-1, 2) # respect format
|
89 |
+
polygon = patches.Polygon(box_array, edgecolor=color, fill=False, linewidth=2)
|
90 |
+
ax.add_patch(polygon)
|
91 |
+
plt.text(box_array[0, 0], box_array[0, 1], label, color='white', fontsize=10, bbox=dict(facecolor=color, alpha=0.8))
|
92 |
+
ax.axis('off')
|
93 |
+
return fig_to_pil(fig)
|
94 |
+
|
95 |
+
def plot_bbox(image, data):
|
96 |
+
img_draw = image.copy()
|
97 |
+
draw = ImageDraw.Draw(img_draw)
|
98 |
+
for bbox, label in zip(data['bboxes'], data['labels']):
|
99 |
+
x1, y1, x2, y2 = bbox
|
100 |
+
draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
|
101 |
+
draw.text((x1, y1), label, fill="white")
|
102 |
return np.array(img_draw)
|
103 |
|
104 |
@spaces.GPU(duration=120)
|
|
|
191 |
"Caption": ("<CAPTION>", lambda result: (result['<CAPTION>'], image)),
|
192 |
"Detailed Caption": ("<DETAILED_CAPTION>", lambda result: (result['<DETAILED_CAPTION>'], image)),
|
193 |
"More Detailed Caption": ("<MORE_DETAILED_CAPTION>", lambda result: (result['<MORE_DETAILED_CAPTION>'], image)),
|
194 |
+
"Caption to Phrase Grounding": ("<CAPTION_TO_PHRASE_GROUNDING>", lambda result: (str(result['<CAPTION_TO_PHRASE_GROUNDING>']), plot_bbox_img(image, result['<CAPTION_TO_PHRASE_GROUNDING>']))),
|
195 |
+
"Object Detection": ("<OD>", lambda result: (str(result['<OD>']), plot_bbox_img(image, result['<OD>']))),
|
196 |
+
"Dense Region Caption": ("<DENSE_REGION_CAPTION>", lambda result: (str(result['<DENSE_REGION_CAPTION>']), plot_bbox_img(image, result['<DENSE_REGION_CAPTION>']))),
|
197 |
+
"Region Proposal": ("<REGION_PROPOSAL>", lambda result: (str(result['<REGION_PROPOSAL>']), plot_bbox_img(image, result['<REGION_PROPOSAL>']))),
|
198 |
+
"Referring Expression Segmentation": ("<REFERRING_EXPRESSION_SEGMENTATION>", lambda result: (str(result['<REFERRING_EXPRESSION_SEGMENTATION>']), draw_poly_img(image, result['<REFERRING_EXPRESSION_SEGMENTATION>'], fill_mask=True))),
|
199 |
+
"Region to Segmentation": ("<REGION_TO_SEGMENTATION>", lambda result: (str(result['<REGION_TO_SEGMENTATION>']), draw_poly_img(image, result['<REGION_TO_SEGMENTATION>'], fill_mask=True))),
|
200 |
+
"Open Vocabulary Detection": ("<OPEN_VOCABULARY_DETECTION>", lambda result: (str(result['<OPEN_VOCABULARY_DETECTION>']), plot_bbox_img(image, result['<OPEN_VOCABULARY_DETECTION>']))),
|
201 |
"Region to Category": ("<REGION_TO_CATEGORY>", lambda result: (result['<REGION_TO_CATEGORY>'], image)),
|
202 |
"Region to Description": ("<REGION_TO_DESCRIPTION>", lambda result: (result['<REGION_TO_DESCRIPTION>'], image)),
|
203 |
"OCR": ("<OCR>", lambda result: (result['<OCR>'], image)),
|
204 |
+
"OCR with Region": ("<OCR_WITH_REGION>", lambda result: (str(result['<OCR_WITH_REGION>']), draw_ocr_bboxes(image, result['<OCR_WITH_REGION>']))),
|
205 |
}
|
206 |
|
207 |
if task in task_mapping:
|
|
|
262 |
"A green car parked in front of a yellow building."
|
263 |
],
|
264 |
[
|
265 |
+
"http://ecx.images-amazon.com/images/I/51UUzBDAMsL.jpg?download=true",
|
266 |
"OCR",
|
267 |
""
|
268 |
]
|