Update app.py
Browse files
app.py
CHANGED
@@ -275,7 +275,7 @@ def infer_radio(image):
|
|
275 |
masks = sam_generator.generate(image_array)
|
276 |
overlay = visualize_anns(image_array, masks)
|
277 |
|
278 |
-
return f"{features.shape}"
|
279 |
|
280 |
|
281 |
|
@@ -284,24 +284,26 @@ title = """RADIO: Reduce All Domains Into One"""
|
|
284 |
description = """
|
285 |
# RADIO
|
286 |
|
287 |
-
AM-RADIO is a framework to distill Large Vision Foundation models into a single one.
|
288 |
RADIO, a new vision foundation model, excels across visual domains, serving as a superior replacement for vision backbones.
|
289 |
Integrating CLIP variants, DINOv2, and SAM through distillation, it preserves unique features like text grounding and segmentation correspondence.
|
290 |
Outperforming teachers in ImageNet zero-shot (+6.8%), kNN (+2.39%), and linear probing segmentation (+3.8%) and vision-language models (LLaVa 1.5 up to 1.5%), it scales to any resolution, supports non-square images.
|
291 |
|
292 |
# Instructions
|
293 |
|
294 |
-
|
|
|
|
|
295 |
"""
|
296 |
|
297 |
inputs = [
|
298 |
gr.Image(type="pil")
|
299 |
]
|
300 |
|
301 |
-
outputs = [
|
302 |
-
gr.Textbox(label="Feature Shape"),
|
303 |
gr.Image(label="PCA Feature Visalization"),
|
304 |
gr.Image(label="SAM Masks"),
|
|
|
305 |
]
|
306 |
|
307 |
# Create the Gradio interface
|
|
|
275 |
masks = sam_generator.generate(image_array)
|
276 |
overlay = visualize_anns(image_array, masks)
|
277 |
|
278 |
+
return pca_viz, overlay, f"{features.shape}"
|
279 |
|
280 |
|
281 |
|
|
|
284 |
description = """
|
285 |
# RADIO
|
286 |
|
287 |
+
[AM-RADIO](https://github.com/NVlabs/RADIO) is a framework to distill Large Vision Foundation models into a single one.
|
288 |
RADIO, a new vision foundation model, excels across visual domains, serving as a superior replacement for vision backbones.
|
289 |
Integrating CLIP variants, DINOv2, and SAM through distillation, it preserves unique features like text grounding and segmentation correspondence.
|
290 |
Outperforming teachers in ImageNet zero-shot (+6.8%), kNN (+2.39%), and linear probing segmentation (+3.8%) and vision-language models (LLaVa 1.5 up to 1.5%), it scales to any resolution, supports non-square images.
|
291 |
|
292 |
# Instructions
|
293 |
|
294 |
+
Paste an image into the input box or pick one from the gallery of examples and then click the "Submit" button.
|
295 |
+
The RADIO backbone features are processed with a PCA projection to 3 channels and displayed as an RGB channels.
|
296 |
+
The SAM features are processed using the SAM decoder and shown as an overlay on top of the input image.
|
297 |
"""
|
298 |
|
299 |
inputs = [
|
300 |
gr.Image(type="pil")
|
301 |
]
|
302 |
|
303 |
+
outputs = [
|
|
|
304 |
gr.Image(label="PCA Feature Visalization"),
|
305 |
gr.Image(label="SAM Masks"),
|
306 |
+
gr.Textbox(label="Feature Shape"),
|
307 |
]
|
308 |
|
309 |
# Create the Gradio interface
|