jiuface commited on
Commit
bc3420d
1 Parent(s): 08430c8

add task prompt selection

Browse files
app.py CHANGED
@@ -26,7 +26,7 @@ SAM_IMAGE_MODEL = load_sam_image_model(device=DEVICE)
26
  @spaces.GPU(duration=20)
27
  @torch.inference_mode()
28
  @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
29
- def process_image(image_input, text_input) -> Optional[Image.Image]:
30
  if not image_input:
31
  gr.Info("Please upload an image.")
32
  return None
@@ -34,14 +34,13 @@ def process_image(image_input, text_input) -> Optional[Image.Image]:
34
  if not text_input:
35
  gr.Info("Please enter a text prompt.")
36
  return None
37
-
38
  _, result = run_florence_inference(
39
  model=FLORENCE_MODEL,
40
  processor=FLORENCE_PROCESSOR,
41
  device=DEVICE,
42
  image=image_input,
43
- task=FLORENCE_OPEN_VOCABULARY_DETECTION_TASK,
44
- text=text_input
45
  )
46
  detections = sv.Detections.from_lmm(
47
  lmm=sv.LMM.FLORENCE_2,
@@ -52,41 +51,43 @@ def process_image(image_input, text_input) -> Optional[Image.Image]:
52
  if len(detections) == 0:
53
  gr.Info("No objects detected.")
54
  return None
55
- return Image.fromarray(detections.mask[0].astype("uint8") * 255)
 
 
 
 
 
56
 
57
 
58
  with gr.Blocks() as demo:
59
  with gr.Row():
60
  with gr.Column():
61
- image_input_component = gr.Image(
62
- type='pil', label='Upload image')
63
- text_input_component = gr.Textbox(
64
- label='Text prompt',
65
- placeholder='Enter text prompts')
66
- submit_button_component = gr.Button(
67
- value='Submit', variant='primary')
 
 
 
 
 
 
 
68
  with gr.Column():
69
- image_output_component = gr.Image(label='Output mask')
70
 
71
  submit_button_component.click(
72
  fn=process_image,
73
  inputs=[
74
- image_input_component,
75
- text_input_component
76
- ],
77
- outputs=[
78
- image_output_component,
79
- ]
80
- )
81
- text_input_component.submit(
82
- fn=process_image,
83
- inputs=[
84
- image_input_component,
85
  text_input_component
86
  ],
87
- outputs=[
88
- image_output_component,
89
- ]
90
  )
 
91
 
92
  demo.launch(debug=False, show_error=True)
 
26
  @spaces.GPU(duration=20)
27
  @torch.inference_mode()
28
  @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
29
+ def process_image(image_input, task_prompt, text_input) -> Optional[Image.Image]:
30
  if not image_input:
31
  gr.Info("Please upload an image.")
32
  return None
 
34
  if not text_input:
35
  gr.Info("Please enter a text prompt.")
36
  return None
 
37
  _, result = run_florence_inference(
38
  model=FLORENCE_MODEL,
39
  processor=FLORENCE_PROCESSOR,
40
  device=DEVICE,
41
  image=image_input,
42
+ task=text_input,
43
+ text=prompt
44
  )
45
  detections = sv.Detections.from_lmm(
46
  lmm=sv.LMM.FLORENCE_2,
 
51
  if len(detections) == 0:
52
  gr.Info("No objects detected.")
53
  return None
54
+ images = []
55
+ print("mask generated:", len(detections.mask))
56
+ for i in range(len(detections.mask)):
57
+ img = Image.fromarray(detections.mask[i].astype(np.uint8) * 255)
58
+ images.append(img)
59
+ return images
60
 
61
 
62
  with gr.Blocks() as demo:
63
  with gr.Row():
64
  with gr.Column():
65
+ image = gr.Image(type='pil', label='Upload image')
66
+ image_url = gr.Textbox( label='Image url', placeholder='Enter text prompts (Optional)')
67
+ task_prompt = gr.Dropdown(
68
+ [
69
+ "<CAPTION>",
70
+ "<DETAILED_CAPTION>",
71
+ "<MORE_DETAILED_CAPTION>",
72
+ "<CAPTION_TO_PHRASE_GROUNDING>",
73
+ "<OPEN_VOCABULARY_DETECTION>",
74
+ '<DENSE_REGION_CAPTION>'
75
+ ], value="<CAPTION_TO_PHRASE_GROUNDING>", multiselect=True, label="Task Prompt", info="task prompts"
76
+ ),
77
+ text_input_component = gr.Textbox(label='Text prompt', placeholder='Enter text prompts')
78
+ submit_button_component = gr.Button(value='Submit', variant='primary')
79
  with gr.Column():
80
+ image_output_component = gr.Gallery(label="Generated images")
81
 
82
  submit_button_component.click(
83
  fn=process_image,
84
  inputs=[
85
+ image,
86
+ task_prompt,
 
 
 
 
 
 
 
 
 
87
  text_input_component
88
  ],
89
+ outputs=image_output_component
 
 
90
  )
91
+
92
 
93
  demo.launch(debug=False, show_error=True)
utils/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (125 Bytes). View file
 
utils/__pycache__/florence.cpython-310.pyc ADDED
Binary file (2.31 kB). View file
 
utils/__pycache__/sam.cpython-310.pyc ADDED
Binary file (1.39 kB). View file
 
utils/florence.py CHANGED
@@ -56,4 +56,5 @@ def run_florence_inference(
56
  generated_ids, skip_special_tokens=False)[0]
57
  response = processor.post_process_generation(
58
  generated_text, task=task, image_size=image.size)
 
59
  return generated_text, response
 
56
  generated_ids, skip_special_tokens=False)[0]
57
  response = processor.post_process_generation(
58
  generated_text, task=task, image_size=image.size)
59
+ print(generated_text, response)
60
  return generated_text, response