|
from functools import partial |
|
|
|
import gradio as gr |
|
|
|
from src.constants import SUBTASKS, TASKS |
|
from src.details import update_subtasks_component, update_load_details_component, load_details_dataframes, \ |
|
display_details, update_sample_idx_component, clear_details |
|
from src.results import update_load_results_component, \ |
|
load_results_dataframes, display_results, update_tasks_component, clear_results, \ |
|
sort_result_paths_per_model, fetch_result_paths |
|
|
|
|
|
|
|
result_paths_per_model = sort_result_paths_per_model(fetch_result_paths()) |
|
load_results_dataframes = partial(load_results_dataframes, result_paths_per_model=result_paths_per_model) |
|
|
|
with gr.Blocks(fill_height=True, fill_width=True) as demo: |
|
gr.HTML("<h1 style='text-align: center;'>Compare Results of the π€ Open LLM Leaderboard</h1>") |
|
gr.HTML("<h3 style='text-align: center;'>Select 2 models to load and compare their results</h3>") |
|
gr.Markdown("Compare Results of the π€ [Open LLM Leaderboard](https://huggingface.co/spaces/open-llm-leaderboard-old/open_llm_leaderboard). " |
|
"Check out the [documentation](https://huggingface.co/docs/leaderboards/open_llm_leaderboard/about) π to find explanations on the evaluations used, their configuration parameters and details on the input/outputs for the models." |
|
) |
|
with gr.Row(): |
|
with gr.Column(): |
|
model_id_1 = gr.Dropdown(choices=list(result_paths_per_model.keys()), label="Models") |
|
dataframe_1 = gr.Dataframe(visible=False) |
|
with gr.Column(): |
|
model_id_2 = gr.Dropdown(choices=list(result_paths_per_model.keys()), label="Models") |
|
dataframe_2 = gr.Dataframe(visible=False) |
|
|
|
with gr.Row(): |
|
with gr.Tab("Results"): |
|
load_results_btn = gr.Button("Load", interactive=False) |
|
clear_results_btn = gr.Button("Clear") |
|
results_task = gr.Radio( |
|
["All"] + list(TASKS.values()), |
|
label="Tasks", |
|
info="Evaluation tasks to be displayed", |
|
value="All", |
|
visible=False, |
|
) |
|
results = gr.HTML() |
|
with gr.Tab("Configs"): |
|
load_configs_btn = gr.Button("Load", interactive=False) |
|
clear_configs_btn = gr.Button("Clear") |
|
configs_task = gr.Radio( |
|
["All"] + list(TASKS.values()), |
|
label="Tasks", |
|
info="Evaluation tasks to be displayed", |
|
value="All", |
|
visible=False, |
|
) |
|
configs = gr.HTML() |
|
with gr.Tab("Details"): |
|
details_task = gr.Radio( |
|
list(TASKS.values()), |
|
label="Tasks", |
|
info="Evaluation tasks to be loaded", |
|
interactive=True, |
|
) |
|
subtask = gr.Radio( |
|
SUBTASKS.get(details_task.value), |
|
label="Subtasks", |
|
info="Evaluation subtasks to be loaded (choose one of the Tasks above)", |
|
) |
|
load_details_btn = gr.Button("Load Details", interactive=False) |
|
clear_details_btn = gr.Button("Clear Details") |
|
sample_idx = gr.Number( |
|
label="Sample Index", |
|
info="Index of the sample to be displayed", |
|
value=0, |
|
minimum=0, |
|
visible=False |
|
) |
|
details = gr.HTML() |
|
details_dataframe_1 = gr.Dataframe(visible=False) |
|
details_dataframe_2 = gr.Dataframe(visible=False) |
|
details_dataframe = gr.DataFrame(visible=False) |
|
|
|
gr.on( |
|
triggers=[model_id_1.input, model_id_2.input], |
|
fn=update_load_results_component, |
|
outputs=[load_results_btn, load_configs_btn], |
|
) |
|
gr.on( |
|
triggers=[load_results_btn.click, load_configs_btn.click], |
|
fn=load_results_dataframes, |
|
inputs=[model_id_1, model_id_2], |
|
outputs=[dataframe_1, dataframe_2], |
|
).then( |
|
fn=update_tasks_component, |
|
outputs=[results_task, configs_task], |
|
) |
|
|
|
results_task.input(fn=lambda task: task, inputs=results_task, outputs=configs_task) |
|
configs_task.input(fn=lambda task: task, inputs=configs_task, outputs=results_task) |
|
gr.on( |
|
triggers=[dataframe_1.change, dataframe_2.change, results_task.change], |
|
fn=display_results, |
|
inputs=[results_task, dataframe_1, dataframe_2], |
|
outputs=[results, configs], |
|
) |
|
gr.on( |
|
triggers=[clear_results_btn.click, clear_configs_btn.click], |
|
fn=clear_results, |
|
outputs=[model_id_1, model_id_2, dataframe_1, dataframe_2, load_results_btn, load_configs_btn, results_task, configs_task], |
|
) |
|
|
|
details_task.change( |
|
fn=update_subtasks_component, |
|
inputs=details_task, |
|
outputs=subtask, |
|
) |
|
gr.on( |
|
triggers=[model_id_1.input, model_id_2.input, subtask.input, details_task.input], |
|
fn=update_load_details_component, |
|
inputs=[model_id_1, model_id_2, subtask], |
|
outputs=load_details_btn, |
|
) |
|
load_details_btn.click( |
|
fn=load_details_dataframes, |
|
inputs=[subtask, model_id_1, model_id_2], |
|
outputs=[details_dataframe_1, details_dataframe_2], |
|
).then( |
|
fn=update_sample_idx_component, |
|
inputs=[details_dataframe_1, details_dataframe_2], |
|
outputs=sample_idx, |
|
) |
|
gr.on( |
|
triggers=[details_dataframe_1.change, details_dataframe_2.change, sample_idx.change], |
|
fn=display_details, |
|
inputs=[sample_idx, details_dataframe_1, details_dataframe_2], |
|
outputs=details, |
|
) |
|
clear_details_btn.click( |
|
fn=clear_details, |
|
outputs=[model_id_1, model_id_2, details_dataframe_1, details_dataframe_2, details_task, subtask, load_details_btn, sample_idx], |
|
) |
|
|
|
demo.launch() |
|
|