roopalgarg's picture
update app to support additional IIW data release sets.
b069388 verified
raw
history blame
4.74 kB
import gradio as gr
import logging
import random
import os
from datasets import load_dataset
from huggingface_hub import login
try:
login()
except:
pass
auth_token = os.environ.get('HF_TOKEN', None)
if not auth_token:
raise ValueError("could not authenticate the user.")
iiw_400 = load_dataset('google/imageinwords', token=auth_token, trust_remote_code=True, name="IIW-400")
docci_test = load_dataset('google/imageinwords', token=auth_token, trust_remote_code=True, name="DOCCI_Test")
locnar_eval = load_dataset('google/imageinwords', token=auth_token, trust_remote_code=True, name="LocNar_Eval")
cm_3600 = load_dataset('google/imageinwords', token=auth_token, trust_remote_code=True, name="CM_3600")
_SELECTOR_TO_DATASET = {
"IIW-400": iiw_400,
"DOCCI_Test": docci_test,
"LocNar_Eval": locnar_eval,
"CM_3600": cm_3600
}
def display_iiw_data_with_slider_change(dataset_type, index):
dataset_split, image_key, image_url_key = "test", "image/key", "image/url"
if dataset_type == "LocNar_Eval":
dataset_split = "validation"
if dataset_type == "DOCCI_Test":
image_url_key = "image/thumbnail_url"
image_key = "image"
logging.warning(f"SELECTION: {dataset_type} : {dataset_split}: {index}")
data = _SELECTOR_TO_DATASET[dataset_type][dataset_split][index]
image_html = f'<img src="{data[image_url_key]}" style="width:100%; max-width:800px; height:auto;">'
image_key_html = f"<p style='font-size: 10px'>Image Key: {data[image_key]}</p>"
iiw_text, iiw_p5b_text, ratings = "", "", ""
if "IIW" in data:
iiw_text = f"<h2>IIW Human-Authored Descriptions</h2><p style='font-size: 16px'>{data['IIW']}</p>"
if "IIW-P5B" in data:
iiw_p5b_text = f"<h2>IIW PaLI-5B Generated Descriptions</h2><p style='font-size: 16px'>{data['IIW-P5B']}</p>"
if 'iiw-human-sxs-iiw-p5b' in data and data['iiw-human-sxs-iiw-p5b'] is not None:
ratings = "<h2>Ratings</h2>"
for key, value in data['iiw-human-sxs-iiw-p5b'].items():
key = key.split("metrics/")[-1]
emoji = ""
if key == "Comprehensiveness":
emoji = "πŸ“š" # Book
elif key == "Specificity":
emoji = "🎯" # Bullseye
elif key == "Hallucination":
emoji = "πŸ‘»" # Ghost
elif key == "First few line(s) as tldr":
emoji = "πŸ”" # Magnifying Glass Tilted Left
elif key == "Human Like":
emoji = "πŸ‘€" # Bust in Silhouette
ratings += f"<p style='font-size: 16px'>{emoji} <strong>{key}</strong>: {value}</p>"
return image_key_html, image_html, iiw_text, iiw_p5b_text, ratings
def display_iiw_data_with_dataset_change(dataset_type, index):
slider = gr.Slider(minimum=0, maximum=max_index(dataset_type)-1, label="Dataset Size", value=0)
image_key_html, image_html, iiw_text, iiw_p5b_text, ratings = display_iiw_data_with_slider_change(dataset_type, index=0)
return slider, image_key_html, image_html, iiw_text, iiw_p5b_text, ratings
def max_index(dataset_type):
dataset_split = "test"
if dataset_type == "LocNar_Eval":
dataset_split = "validation"
logging.warning(f"SELECTION: {dataset_type} : {dataset_split}")
dataset_instance =_SELECTOR_TO_DATASET[dataset_type][dataset_split]
return len(dataset_instance)
with gr.Blocks() as demo:
gr.Markdown("# ImageInWords: Unlocking Hyper-Detailed Image Descriptions")
gr.Markdown("Slide across the slider to see various examples across the different IIW datasets.")
with gr.Row():
dataset_selector = gr.Radio(["IIW-400", "DOCCI_Test", "LocNar_Eval", "CM_3600"], value="IIW-400", label="IIW Datasets")
slider, image_key_html, image_html, iiw_text, iiw_p5b_text, ratings = display_iiw_data_with_dataset_change(dataset_selector.value, index=0)
with gr.Row():
with gr.Column():
image_output = gr.HTML(image_html)
with gr.Column():
image_key_output = gr.HTML(image_key_html)
if iiw_text:
iiw_text_output = gr.HTML(iiw_text)
if iiw_p5b_text:
iiw_p5b_text_output = gr.HTML(iiw_p5b_text)
if ratings:
ratings_output = gr.HTML(ratings)
slider.change(display_iiw_data_with_slider_change, inputs=[dataset_selector, slider], outputs=[image_key_output, image_output, iiw_text_output, iiw_p5b_text_output, ratings_output])
dataset_selector.change(display_iiw_data_with_dataset_change, inputs=[dataset_selector, slider], outputs=[slider, image_key_output, image_output, iiw_text_output, iiw_p5b_text_output, ratings_output])
demo.launch(debug=True)