Spaces:
Paused
Paused
auto adj bbox width
Browse files- app.py +14 -29
- imgs/saved_image_demo.png +0 -0
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
from typing import Optional
|
2 |
-
import spaces
|
3 |
|
4 |
import gradio as gr
|
5 |
import numpy as np
|
@@ -25,31 +25,6 @@ caption_model_processor = {'processor': processor, 'model': model}
|
|
25 |
print('finish loading model!!!')
|
26 |
|
27 |
|
28 |
-
platform = 'pc'
|
29 |
-
if platform == 'pc':
|
30 |
-
draw_bbox_config = {
|
31 |
-
'text_scale': 0.8,
|
32 |
-
'text_thickness': 2,
|
33 |
-
'text_padding': 2,
|
34 |
-
'thickness': 2,
|
35 |
-
}
|
36 |
-
elif platform == 'web':
|
37 |
-
draw_bbox_config = {
|
38 |
-
'text_scale': 0.8,
|
39 |
-
'text_thickness': 2,
|
40 |
-
'text_padding': 3,
|
41 |
-
'thickness': 3,
|
42 |
-
}
|
43 |
-
elif platform == 'mobile':
|
44 |
-
draw_bbox_config = {
|
45 |
-
'text_scale': 0.8,
|
46 |
-
'text_thickness': 2,
|
47 |
-
'text_padding': 3,
|
48 |
-
'thickness': 3,
|
49 |
-
}
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
MARKDOWN = """
|
54 |
# OmniParser for Pure Vision Based General GUI Agent 🔥
|
55 |
<div>
|
@@ -59,6 +34,8 @@ MARKDOWN = """
|
|
59 |
</div>
|
60 |
|
61 |
OmniParser is a screen parsing tool to convert general GUI screen to structured elements.
|
|
|
|
|
62 |
"""
|
63 |
|
64 |
# DEVICE = torch.device('cuda')
|
@@ -66,7 +43,7 @@ OmniParser is a screen parsing tool to convert general GUI screen to structured
|
|
66 |
# @spaces.GPU
|
67 |
@torch.inference_mode()
|
68 |
# @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
|
69 |
-
@spaces.GPU(duration=65)
|
70 |
def process(
|
71 |
image_input,
|
72 |
box_threshold,
|
@@ -76,6 +53,14 @@ def process(
|
|
76 |
image_save_path = 'imgs/saved_image_demo.png'
|
77 |
image_input.save(image_save_path)
|
78 |
# import pdb; pdb.set_trace()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
ocr_bbox_rslt, is_goal_filtered = check_ocr_box(image_save_path, display_img = False, output_bb_format='xyxy', goal_filtering=None, easyocr_args={'paragraph': False, 'text_threshold':0.9}, use_paddleocr=True)
|
81 |
text, ocr_bbox = ocr_bbox_rslt
|
@@ -117,5 +102,5 @@ with gr.Blocks() as demo:
|
|
117 |
)
|
118 |
|
119 |
# demo.launch(debug=False, show_error=True, share=True)
|
120 |
-
|
121 |
-
demo.queue().launch(share=False)
|
|
|
1 |
from typing import Optional
|
2 |
+
# import spaces
|
3 |
|
4 |
import gradio as gr
|
5 |
import numpy as np
|
|
|
25 |
print('finish loading model!!!')
|
26 |
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
MARKDOWN = """
|
29 |
# OmniParser for Pure Vision Based General GUI Agent 🔥
|
30 |
<div>
|
|
|
34 |
</div>
|
35 |
|
36 |
OmniParser is a screen parsing tool to convert general GUI screen to structured elements.
|
37 |
+
|
38 |
+
📢 [[Project Page](https://microsoft.github.io/OmniParser/)] [[Blog Post](https://www.microsoft.com/en-us/research/articles/omniparser-for-pure-vision-based-gui-agent/)] [[Models](https://huggingface.co/microsoft/OmniParser)]
|
39 |
"""
|
40 |
|
41 |
# DEVICE = torch.device('cuda')
|
|
|
43 |
# @spaces.GPU
|
44 |
@torch.inference_mode()
|
45 |
# @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
|
46 |
+
# @spaces.GPU(duration=65)
|
47 |
def process(
|
48 |
image_input,
|
49 |
box_threshold,
|
|
|
53 |
image_save_path = 'imgs/saved_image_demo.png'
|
54 |
image_input.save(image_save_path)
|
55 |
# import pdb; pdb.set_trace()
|
56 |
+
image = Image.open(image_save_path)
|
57 |
+
box_overlay_ratio = image.size[0] / 3200
|
58 |
+
draw_bbox_config = {
|
59 |
+
'text_scale': 0.8 * box_overlay_ratio,
|
60 |
+
'text_thickness': max(int(2 * box_overlay_ratio), 1),
|
61 |
+
'text_padding': max(int(3 * box_overlay_ratio), 1),
|
62 |
+
'thickness': max(int(3 * box_overlay_ratio), 1),
|
63 |
+
}
|
64 |
|
65 |
ocr_bbox_rslt, is_goal_filtered = check_ocr_box(image_save_path, display_img = False, output_bb_format='xyxy', goal_filtering=None, easyocr_args={'paragraph': False, 'text_threshold':0.9}, use_paddleocr=True)
|
66 |
text, ocr_bbox = ocr_bbox_rslt
|
|
|
102 |
)
|
103 |
|
104 |
# demo.launch(debug=False, show_error=True, share=True)
|
105 |
+
demo.launch(share=True, server_port=7861, server_name='0.0.0.0')
|
106 |
+
# demo.queue().launch(share=False)
|
imgs/saved_image_demo.png
CHANGED