Spaces:

TIGER-Lab
/

MEGA-Bench

Running

File size: 7,491 Bytes

b2c8d29
8b2c873
8553d06
 
 
 
 
 
eeb88fb
 
09497a7
8553d06
eeb88fb
 
 
09497a7
 
8553d06
eeb88fb
8b2c873
 
b2c8d29
eeb88fb
 
 
09497a7
eeb88fb
 
 
8553d06
 
b2c8d29
 
8553d06
 
 
 
 
 
 
 
 
 
 
 
b2c8d29
eeb88fb
 
4301eca
 
eeb88fb
 
 
 
bc925b6
2a2ba62
4301eca
eeb88fb
 
 
 
 
 
 
b2c8d29
8553d06
eeb88fb
f724d2e
eeb88fb
8553d06
 
6a59158
8553d06
 
 
 
eeb88fb
8553d06
 
 
3d5ede1
8553d06
 
4301eca
1f300cb
8553d06
eeb88fb
 
 
 
 
4301eca
 
 
eeb88fb
 
 
 
 
3d5ede1
4301eca
1f300cb
eeb88fb
 
09497a7
eeb88fb
 
 
4301eca
eeb88fb
 
 
 
 
8553d06
eeb88fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2c8d29
8553d06
 
 
be407a0
b2c8d29
8553d06
b2c8d29
 
 
8553d06

import gradio as gr
from utils import MEGABenchEvalDataLoader
import os
from constants import *

# Get the directory of the current script
current_dir = os.path.dirname(os.path.abspath(__file__))

# Construct paths to CSS files
base_css_file = os.path.join(current_dir, "static", "css", "style.css")
table_css_file = os.path.join(current_dir, "static", "css", "table.css")

# Read CSS files
with open(base_css_file, "r") as f:
    base_css = f.read()
with open(table_css_file, "r") as f:
    table_css = f.read()

# Initialize data loaders
default_loader = MEGABenchEvalDataLoader("./static/eval_results/Default")
si_loader = MEGABenchEvalDataLoader("./static/eval_results/SI")

with gr.Blocks() as block:
    # Add a style element that we'll update
    css_style = gr.HTML(
        f"<style>{base_css}\n{table_css}</style>",
        visible=False
    )
    
    gr.Markdown(
        LEADERBOARD_INTRODUCTION
    )
    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("📊 MEGA-Bench", elem_id="qa-tab-table1", id=1):
            with gr.Row():
                with gr.Accordion("Citation", open=False):
                    citation_button = gr.Textbox(
                        value=CITATION_BUTTON_TEXT,
                        label=CITATION_BUTTON_LABEL,
                        elem_id="citation-button",
                        lines=10,
                    )
            gr.Markdown(
                TABLE_INTRODUCTION
            )

            with gr.Row():
                table_selector = gr.Radio(
                    choices=["Default", "Single Image"],
                    label="Select table to display. Default: all MEGA-Bench tasks; Single Image: single-image tasks only.",
                    value="Default"
                )

            # Define different captions for each table
            default_caption = "**Table 1: MEGA-Bench full results.** The number in the parentheses is the number of tasks of each keyword. <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> Different from the results in our paper, we only use the Core results with CoT prompting here for clarity and compatibility with the released data. <br> $\\text{Overall} \\ = \\ \\frac{\\text{Core} \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$ <br> * indicates self-reported results from the model authors."

            single_image_caption = "**Table 2: MEGA-Bench Single-image setting results.** The number in the parentheses is the number of tasks in each keyword. <br> This subset contains 273 single-image tasks from the Core set and 42 single-image tasks from the Open-ended set. For open-source models, we drop the image input in the 1-shot demonstration example so that the entire query contains a single image only. <br> Compared to the default table, some models with only single-image support are added."

            caption_component = gr.Markdown(
                value=default_caption,
                elem_classes="table-caption",
                latex_delimiters=[{"left": "$", "right": "$", "display": False}],
            )

            with gr.Row():
                super_group_selector = gr.Radio(
                    choices=list(default_loader.SUPER_GROUPS.keys()),
                    label="Select a dimension to display breakdown results. We use different column colors to distinguish the overall benchmark scores and breakdown results.",
                    value=list(default_loader.SUPER_GROUPS.keys())[0]
                )
                model_group_selector = gr.Radio(
                    choices=list(BASE_MODEL_GROUPS.keys()),
                    label="Select a model group",
                    value="All"
                )

            initial_headers, initial_data = default_loader.get_leaderboard_data(list(default_loader.SUPER_GROUPS.keys())[0], "All")
            data_component = gr.Dataframe(
                value=initial_data,
                headers=initial_headers,
                datatype=["number", "html"] + ["number"] * (len(initial_headers) - 2),
                interactive=False,
                elem_classes="custom-dataframe",
                max_height=2400,
                column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(initial_headers) - 5),
            )

            def update_table_and_caption(table_type, super_group, model_group):
                if table_type == "Default":
                    headers, data = default_loader.get_leaderboard_data(super_group, model_group)
                    caption = default_caption
                else:  # Single-image
                    headers, data = si_loader.get_leaderboard_data(super_group, model_group)
                    caption = single_image_caption
                
                return [
                    gr.Dataframe(
                        value=data,
                        headers=headers,
                        datatype=["number", "html"] + ["number"] * (len(headers) - 2),
                        interactive=False,
                        column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(headers) - 5),
                    ),
                    caption,
                    f"<style>{base_css}\n{table_css}</style>"
                ]

            def update_selectors(table_type):
                loader = default_loader if table_type == "Default" else si_loader
                return [
                    gr.Radio(choices=list(loader.SUPER_GROUPS.keys())),
                    gr.Radio(choices=list(loader.MODEL_GROUPS.keys()))
                ]

            refresh_button = gr.Button("Refresh")
            
            # Update click and change handlers to include caption updates
            refresh_button.click(
                fn=update_table_and_caption, 
                inputs=[table_selector, super_group_selector, model_group_selector], 
                outputs=[data_component, caption_component, css_style]
            )
            super_group_selector.change(
                fn=update_table_and_caption, 
                inputs=[table_selector, super_group_selector, model_group_selector], 
                outputs=[data_component, caption_component, css_style]
            )
            model_group_selector.change(
                fn=update_table_and_caption, 
                inputs=[table_selector, super_group_selector, model_group_selector], 
                outputs=[data_component, caption_component, css_style]
            )
            table_selector.change(
                fn=update_selectors,
                inputs=[table_selector],
                outputs=[super_group_selector, model_group_selector]
            ).then(
                fn=update_table_and_caption,
                inputs=[table_selector, super_group_selector, model_group_selector],
                outputs=[data_component, caption_component, css_style]
            )

        with gr.TabItem("📝 Data Information", elem_id="qa-tab-table2", id=2):
            gr.Markdown(DATA_INFO, elem_classes="markdown-text")

        with gr.TabItem("🚀 Submit", elem_id="submit-tab", id=3):
            with gr.Row():
                gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")



if __name__ == "__main__":
    block.launch(share=True)