OmniParser

Paused

adamlu1 commited on 28 days ago

Commit

0375f07

•

1 Parent(s): 2c16cb7

debug

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,8 +13,18 @@ from utils import check_ocr_box, get_yolo_model, get_caption_model_processor, ge
 import torch
 from PIL import Image
-yolo_model = get_yolo_model(model_path='weights/icon_detect/best.pt')
-caption_model_processor = get_caption_model_processor(model_name="florence2", model_name_or_path="weights/icon_caption_florence")
 platform = 'pc'
 if platform == 'pc':
     draw_bbox_config = {
@@ -51,10 +61,10 @@ MARKDOWN = """
 OmniParser is a screen parsing tool to convert general GUI screen to structured elements.
 """
-DEVICE = torch.device('cuda')
 # @spaces.GPU
-# @torch.inference_mode()
 # @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
 @spaces.GPU(duration=65)
 def process(

 import torch
 from PIL import Image
+# yolo_model = get_yolo_model(model_path='weights/icon_detect/best.pt')
+# caption_model_processor = get_caption_model_processor(model_name="florence2", model_name_or_path="weights/icon_caption_florence")
+from ultralytics import YOLO
+yolo_model = YOLO('weights/icon_detect/best.pt').to('cuda')
+from transformers import AutoProcessor, AutoModelForCausalLM
+processor = AutoProcessor.from_pretrained("microsoft/Florence-2-base", trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained("weights/icon_caption_florence", torch_dtype=torch.float16, trust_remote_code=True).to('cuda')
+caption_model_processor = {'processor': processor, 'model': model}
+print('finish loading model!!!')
 platform = 'pc'
 if platform == 'pc':
     draw_bbox_config = {
 OmniParser is a screen parsing tool to convert general GUI screen to structured elements.
 """
+# DEVICE = torch.device('cuda')
 # @spaces.GPU
+@torch.inference_mode()
 # @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
 @spaces.GPU(duration=65)
 def process(