gheinrich commited on
Commit
703aea5
1 Parent(s): 2420cd1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -275,7 +275,7 @@ def infer_radio(image):
275
  masks = sam_generator.generate(image_array)
276
  overlay = visualize_anns(image_array, masks)
277
 
278
- return f"{features.shape}", pca_viz, overlay
279
 
280
 
281
 
@@ -284,24 +284,26 @@ title = """RADIO: Reduce All Domains Into One"""
284
  description = """
285
  # RADIO
286
 
287
- AM-RADIO is a framework to distill Large Vision Foundation models into a single one.
288
  RADIO, a new vision foundation model, excels across visual domains, serving as a superior replacement for vision backbones.
289
  Integrating CLIP variants, DINOv2, and SAM through distillation, it preserves unique features like text grounding and segmentation correspondence.
290
  Outperforming teachers in ImageNet zero-shot (+6.8%), kNN (+2.39%), and linear probing segmentation (+3.8%) and vision-language models (LLaVa 1.5 up to 1.5%), it scales to any resolution, supports non-square images.
291
 
292
  # Instructions
293
 
294
- Simply paste an image or pick one from the gallery of examples and then click the "Submit" button.
 
 
295
  """
296
 
297
  inputs = [
298
  gr.Image(type="pil")
299
  ]
300
 
301
- outputs = [
302
- gr.Textbox(label="Feature Shape"),
303
  gr.Image(label="PCA Feature Visalization"),
304
  gr.Image(label="SAM Masks"),
 
305
  ]
306
 
307
  # Create the Gradio interface
 
275
  masks = sam_generator.generate(image_array)
276
  overlay = visualize_anns(image_array, masks)
277
 
278
+ return pca_viz, overlay, f"{features.shape}"
279
 
280
 
281
 
 
284
  description = """
285
  # RADIO
286
 
287
+ [AM-RADIO](https://github.com/NVlabs/RADIO) is a framework to distill Large Vision Foundation models into a single one.
288
  RADIO, a new vision foundation model, excels across visual domains, serving as a superior replacement for vision backbones.
289
  Integrating CLIP variants, DINOv2, and SAM through distillation, it preserves unique features like text grounding and segmentation correspondence.
290
  Outperforming teachers in ImageNet zero-shot (+6.8%), kNN (+2.39%), and linear probing segmentation (+3.8%) and vision-language models (LLaVa 1.5 up to 1.5%), it scales to any resolution, supports non-square images.
291
 
292
  # Instructions
293
 
294
+ Paste an image into the input box or pick one from the gallery of examples and then click the "Submit" button.
295
+ The RADIO backbone features are processed with a PCA projection to 3 channels and displayed as an RGB channels.
296
+ The SAM features are processed using the SAM decoder and shown as an overlay on top of the input image.
297
  """
298
 
299
  inputs = [
300
  gr.Image(type="pil")
301
  ]
302
 
303
+ outputs = [
 
304
  gr.Image(label="PCA Feature Visalization"),
305
  gr.Image(label="SAM Masks"),
306
+ gr.Textbox(label="Feature Shape"),
307
  ]
308
 
309
  # Create the Gradio interface