praeclarumjj3 commited on
Commit
f525997
1 Parent(s): ca4c6a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -371,8 +371,8 @@ description = "<p style='font-size: 16px; margin: 5px; font-weight: w300; text-a
371
  + "<p style='font-size: 12px; margin: 5px; font-weight: w300; text-align: center'><sup>*</sup>Equal Advising</p>" \
372
  + "<p style='font-size: 16px; margin: 5px; font-weight: w600; text-align: center'> <a href='https://praeclarumjj3.github.io/ola_vlm/' target='_blank'>Project Page</a> | <a href='https://youtu.be/' target='_blank'>Video</a> | <a href='https://arxiv.org/abs/2412.09585' target='_blank'>ArXiv</a> | <a href='https://github.com/SHI-Labs/OLA-VLM' target='_blank'>Github</a></p>" \
373
  + "<p style='text-align: center; font-size: 14px; margin: 5px; font-weight: w300;'>OLA-VLM introduces a new approach to distilling vision knowledge into the hidden representations of LLMs, utilizing target representations to advance visual perception in MLLMs.</p>" \
374
- + "<p style='text-align: left; font-size: 12px; margin: 5px; font-weight: w300;'>In the demo, along with the chatting with OLA-VLM, you can also visualize the intermediate representations from selected layers of the LLM by clicking on the <code>Visualize Intermediate Representations</code> button! Note that our demo only supports single image input currently.</p>" \
375
- + "<ul style='text-align: left; font-size: 12px; margin: 5px; font-weight: w300; padding: 0;'> \
376
  <li><b>depth</b>: Visualizes the depth information in the representations using the decoder from the <a href='https://github.com/DepthAnything/Depth-Anything-V2' target='_blank'>Depth-Anything-v2 model</a>.</li> \
377
  <li><b>seg</b>: Visualizes the segmentation information in the representations using the decoder from the <a href='https://github.com/SHI-Labs/OneFormer' target='_blank'>OneFormer model</a>.</li> \
378
  <li><b>gen</b>: Visualizes the general information of the representations using the <a href='https://huggingface.co/stabilityai/stable-diffusion-2-1-unclip' target='_blank'>SD-2.1-unCLIP</a>. Note that the output is a variation of the input image due to the nature of unCLIP.</li> \
 
371
  + "<p style='font-size: 12px; margin: 5px; font-weight: w300; text-align: center'><sup>*</sup>Equal Advising</p>" \
372
  + "<p style='font-size: 16px; margin: 5px; font-weight: w600; text-align: center'> <a href='https://praeclarumjj3.github.io/ola_vlm/' target='_blank'>Project Page</a> | <a href='https://youtu.be/' target='_blank'>Video</a> | <a href='https://arxiv.org/abs/2412.09585' target='_blank'>ArXiv</a> | <a href='https://github.com/SHI-Labs/OLA-VLM' target='_blank'>Github</a></p>" \
373
  + "<p style='text-align: center; font-size: 14px; margin: 5px; font-weight: w300;'>OLA-VLM introduces a new approach to distilling vision knowledge into the hidden representations of LLMs, utilizing target representations to advance visual perception in MLLMs.</p>" \
374
+ + "<p style='text-align: left; font-size: 14px; margin: 5px; font-weight: w300;'>In the demo, along with the chatting with OLA-VLM, you can also visualize the intermediate representations from selected layers of the LLM by clicking on the <code style='font-size: 14px;'>Visualize Intermediate Representations</code> button! Note that our demo only supports single image input currently.</p>" \
375
+ + "<ul style='text-align: left; font-size: 14px; margin: 5px; font-weight: w300; padding: 0;'> \
376
  <li><b>depth</b>: Visualizes the depth information in the representations using the decoder from the <a href='https://github.com/DepthAnything/Depth-Anything-V2' target='_blank'>Depth-Anything-v2 model</a>.</li> \
377
  <li><b>seg</b>: Visualizes the segmentation information in the representations using the decoder from the <a href='https://github.com/SHI-Labs/OneFormer' target='_blank'>OneFormer model</a>.</li> \
378
  <li><b>gen</b>: Visualizes the general information of the representations using the <a href='https://huggingface.co/stabilityai/stable-diffusion-2-1-unclip' target='_blank'>SD-2.1-unCLIP</a>. Note that the output is a variation of the input image due to the nature of unCLIP.</li> \