|
from functools import partial |
|
|
|
import gradio as gr |
|
|
|
import src.constants as constants |
|
from src.details import ( |
|
clear_details, |
|
display_details, |
|
display_loading_message_for_details, |
|
load_details_dataframes, |
|
update_load_details_component, |
|
update_sample_idx_component, |
|
update_subtasks_component, |
|
update_task_description_component, |
|
) |
|
from src.results import ( |
|
clear_results, |
|
display_loading_message_for_results, |
|
display_results, |
|
fetch_result_paths, |
|
load_results_dataframes, |
|
sort_result_paths_per_model, |
|
update_load_results_component, |
|
update_tasks_component, |
|
) |
|
|
|
|
|
|
|
result_paths_per_model = sort_result_paths_per_model(fetch_result_paths()) |
|
load_results_dataframes = partial(load_results_dataframes, result_paths_per_model=result_paths_per_model) |
|
|
|
with gr.Blocks(fill_height=True, fill_width=True) as demo: |
|
gr.HTML("<h1 style='text-align: center;'>Compare Results of the π€ Open LLM Leaderboard</h1>") |
|
gr.HTML("<h3 style='text-align: center;'>Select 2 models to load and compare their results</h3>") |
|
gr.HTML( |
|
"<p style='text-align: center; color:orange;'>β This demo is a beta version, and we're actively working on it, so you might find some tiny bugs! Please report any issues you have in the Community tab to help us make it better for all.</p>" |
|
) |
|
gr.Markdown( |
|
"Compare Results of the π€ [Open LLM Leaderboard](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). " |
|
"Check out the [documentation](https://huggingface.co/docs/leaderboards/open_llm_leaderboard/about) π to find explanations on the evaluations used, their configuration parameters and details on the input/outputs for the models." |
|
) |
|
with gr.Row(): |
|
with gr.Column(): |
|
model_id_1 = gr.Dropdown(choices=list(result_paths_per_model.keys()), label="Models") |
|
dataframe_1 = gr.Dataframe(visible=False) |
|
with gr.Column(): |
|
model_id_2 = gr.Dropdown(choices=list(result_paths_per_model.keys()), label="Models") |
|
dataframe_2 = gr.Dataframe(visible=False) |
|
|
|
with gr.Row(): |
|
with gr.Tab("Results"): |
|
load_results_btn = gr.Button("Load", interactive=False) |
|
clear_results_btn = gr.Button("Clear") |
|
results_task = gr.Radio( |
|
["All"] + list(constants.TASKS.values()), |
|
label="Tasks", |
|
info="Evaluation tasks to be displayed", |
|
value="All", |
|
visible=False, |
|
) |
|
results_task_description = gr.Textbox( |
|
label="Task Description", |
|
lines=3, |
|
visible=False, |
|
) |
|
results = gr.HTML() |
|
with gr.Tab("Configs"): |
|
load_configs_btn = gr.Button("Load", interactive=False) |
|
clear_configs_btn = gr.Button("Clear") |
|
configs_task = gr.Radio( |
|
["All"] + list(constants.TASKS.values()), |
|
label="Tasks", |
|
info="Evaluation tasks to be displayed", |
|
value="All", |
|
visible=False, |
|
) |
|
configs_task_description = gr.Textbox( |
|
label="Task Description", |
|
lines=3, |
|
visible=False, |
|
) |
|
configs = gr.HTML() |
|
with gr.Tab("Details"): |
|
details_task = gr.Radio( |
|
[value for value in constants.TASKS.values() if value[1] != "leaderboard_gpqa"], |
|
label="Tasks", |
|
info="Evaluation tasks to be loaded", |
|
interactive=True, |
|
) |
|
details_task_description = gr.Textbox( |
|
label="Task Description", |
|
lines=3, |
|
) |
|
subtask = gr.Radio( |
|
|
|
label="Subtasks", |
|
info="Evaluation subtasks to be loaded (choose one of the Tasks above)", |
|
) |
|
load_details_btn = gr.Button("Load Details", interactive=False) |
|
clear_details_btn = gr.Button("Clear Details") |
|
sample_idx = gr.Number( |
|
label="Sample Index", info="Index of the sample to be displayed", value=0, minimum=0, visible=False |
|
) |
|
details = gr.HTML() |
|
details_dataframe_1 = gr.Dataframe(visible=False) |
|
details_dataframe_2 = gr.Dataframe(visible=False) |
|
details_dataframe = gr.DataFrame(visible=False) |
|
|
|
gr.on( |
|
triggers=[model_id_1.input, model_id_2.input], |
|
fn=update_load_results_component, |
|
outputs=[load_results_btn, load_configs_btn], |
|
) |
|
gr.on( |
|
triggers=[load_results_btn.click, load_configs_btn.click], |
|
fn=display_loading_message_for_results, |
|
outputs=[results, configs], |
|
).then( |
|
fn=load_results_dataframes, |
|
inputs=[model_id_1, model_id_2], |
|
outputs=[dataframe_1, dataframe_2], |
|
).then( |
|
fn=update_tasks_component, |
|
outputs=[results_task, configs_task], |
|
) |
|
|
|
results_task.input(fn=lambda task: task, inputs=results_task, outputs=configs_task) |
|
configs_task.input(fn=lambda task: task, inputs=configs_task, outputs=results_task) |
|
|
|
results_task.change( |
|
fn=update_task_description_component, |
|
inputs=results_task, |
|
outputs=results_task_description, |
|
).then( |
|
fn=update_task_description_component, |
|
inputs=results_task, |
|
outputs=configs_task_description, |
|
) |
|
|
|
gr.on( |
|
triggers=[dataframe_1.change, dataframe_2.change, results_task.change], |
|
fn=display_results, |
|
inputs=[results_task, dataframe_1, dataframe_2], |
|
outputs=[results, configs], |
|
) |
|
gr.on( |
|
triggers=[clear_results_btn.click, clear_configs_btn.click], |
|
fn=clear_results, |
|
outputs=[ |
|
model_id_1, |
|
model_id_2, |
|
dataframe_1, |
|
dataframe_2, |
|
load_results_btn, |
|
load_configs_btn, |
|
results_task, |
|
configs_task, |
|
], |
|
) |
|
|
|
|
|
details_task.change( |
|
fn=update_task_description_component, |
|
inputs=details_task, |
|
outputs=details_task_description, |
|
).then( |
|
fn=update_subtasks_component, |
|
inputs=details_task, |
|
outputs=subtask, |
|
) |
|
gr.on( |
|
triggers=[model_id_1.input, model_id_2.input, subtask.input, details_task.input], |
|
fn=update_load_details_component, |
|
inputs=[model_id_1, model_id_2, subtask], |
|
outputs=load_details_btn, |
|
) |
|
load_details_btn.click( |
|
fn=display_loading_message_for_details, |
|
outputs=details, |
|
).then( |
|
fn=load_details_dataframes, |
|
inputs=[subtask, model_id_1, model_id_2], |
|
outputs=[details_dataframe_1, details_dataframe_2], |
|
).then( |
|
fn=update_sample_idx_component, |
|
inputs=[details_dataframe_1, details_dataframe_2], |
|
outputs=sample_idx, |
|
) |
|
gr.on( |
|
triggers=[details_dataframe_1.change, details_dataframe_2.change, sample_idx.change], |
|
fn=display_details, |
|
inputs=[sample_idx, details_dataframe_1, details_dataframe_2], |
|
outputs=details, |
|
) |
|
clear_details_btn.click( |
|
fn=clear_details, |
|
outputs=[ |
|
model_id_1, |
|
model_id_2, |
|
details_dataframe_1, |
|
details_dataframe_2, |
|
details_task, |
|
subtask, |
|
load_details_btn, |
|
sample_idx, |
|
], |
|
) |
|
|
|
demo.launch() |
|
|