File size: 5,930 Bytes
c2c9efa 25557b5 841e241 c2c9efa 8e404a5 6679087 07448fb e611814 8e404a5 e611814 4289e9d e611814 023a289 7a0e5b8 e611814 8e404a5 6679087 e611814 8e404a5 6679087 e611814 05c90f4 9c39267 3caeacd bf6ab81 3caeacd 9c39267 7379857 3caeacd 71dfe85 3caeacd 71dfe85 3caeacd 7379857 3caeacd 7379857 71dfe85 7379857 c8b695a 07448fb bd858f5 7379857 25557b5 1f43e72 9c39267 3caeacd 9c39267 3caeacd 9c39267 99aea78 5b4c5f8 3caeacd 9c39267 ddc25db 9c39267 8f68cc2 9c39267 6679087 9c39267 6679087 7379857 9c39267 54202cb 9c39267 54202cb 7379857 3caeacd c8b695a 1c1cb58 c8b695a 7379857 99aea78 bd858f5 7379857 0d84f54 7379857 eec78c0 7379857 e611814 07448fb 1c1cb58 07448fb e611814 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
from functools import partial
import gradio as gr
from src.constants import SUBTASKS, TASKS
from src.details import update_subtasks_component, update_load_details_component, load_details_dataframes, \
display_details, update_sample_idx_component, clear_details
from src.results import update_load_results_component, \
load_results_dataframes, display_results, update_tasks_component, clear_results, \
sort_result_paths_per_model, fetch_result_paths
# if __name__ == "__main__":
result_paths_per_model = sort_result_paths_per_model(fetch_result_paths())
load_results_dataframes = partial(load_results_dataframes, result_paths_per_model=result_paths_per_model)
with gr.Blocks(fill_height=True, fill_width=True) as demo:
gr.HTML("<h1 style='text-align: center;'>Compare Results of the π€ Open LLM Leaderboard</h1>")
gr.HTML("<h3 style='text-align: center;'>Select 2 models to load and compare their results</h3>")
gr.Markdown("Compare Results of the π€ [Open LLM Leaderboard](https://huggingface.co/spaces/open-llm-leaderboard-old/open_llm_leaderboard). "
"Check out the [documentation](https://huggingface.co/docs/leaderboards/open_llm_leaderboard/about) π to find explanations on the evaluations used, their configuration parameters and details on the input/outputs for the models."
)
with gr.Row():
with gr.Column():
model_id_1 = gr.Dropdown(choices=list(result_paths_per_model.keys()), label="Models")
dataframe_1 = gr.Dataframe(visible=False)
with gr.Column():
model_id_2 = gr.Dropdown(choices=list(result_paths_per_model.keys()), label="Models")
dataframe_2 = gr.Dataframe(visible=False)
with gr.Row():
with gr.Tab("Results"):
load_results_btn = gr.Button("Load", interactive=False)
clear_results_btn = gr.Button("Clear")
results_task = gr.Radio(
["All"] + list(TASKS.values()),
label="Tasks",
info="Evaluation tasks to be displayed",
value="All",
visible=False,
)
results = gr.HTML()
with gr.Tab("Configs"):
load_configs_btn = gr.Button("Load", interactive=False)
clear_configs_btn = gr.Button("Clear")
configs_task = gr.Radio(
["All"] + list(TASKS.values()),
label="Tasks",
info="Evaluation tasks to be displayed",
value="All",
visible=False,
)
configs = gr.HTML()
with gr.Tab("Details"):
details_task = gr.Radio(
list(TASKS.values()),
label="Tasks",
info="Evaluation tasks to be loaded",
interactive=True,
)
subtask = gr.Radio(
SUBTASKS.get(details_task.value),
label="Subtasks",
info="Evaluation subtasks to be loaded (choose one of the Tasks above)",
)
load_details_btn = gr.Button("Load Details", interactive=False)
clear_details_btn = gr.Button("Clear Details")
sample_idx = gr.Number(
label="Sample Index",
info="Index of the sample to be displayed",
value=0,
minimum=0,
visible=False
)
details = gr.HTML()
details_dataframe_1 = gr.Dataframe(visible=False)
details_dataframe_2 = gr.Dataframe(visible=False)
details_dataframe = gr.DataFrame(visible=False)
gr.on(
triggers=[model_id_1.input, model_id_2.input],
fn=update_load_results_component,
outputs=[load_results_btn, load_configs_btn],
)
gr.on(
triggers=[load_results_btn.click, load_configs_btn.click],
fn=load_results_dataframes,
inputs=[model_id_1, model_id_2],
outputs=[dataframe_1, dataframe_2],
).then(
fn=update_tasks_component,
outputs=[results_task, configs_task],
)
# Synchronize the results_task and configs_task radio buttons
results_task.input(fn=lambda task: task, inputs=results_task, outputs=configs_task)
configs_task.input(fn=lambda task: task, inputs=configs_task, outputs=results_task)
gr.on(
triggers=[dataframe_1.change, dataframe_2.change, results_task.change],
fn=display_results,
inputs=[results_task, dataframe_1, dataframe_2],
outputs=[results, configs],
)
gr.on(
triggers=[clear_results_btn.click, clear_configs_btn.click],
fn=clear_results,
outputs=[model_id_1, model_id_2, dataframe_1, dataframe_2, load_results_btn, load_configs_btn, results_task, configs_task],
)
details_task.change(
fn=update_subtasks_component,
inputs=details_task,
outputs=subtask,
)
gr.on(
triggers=[model_id_1.input, model_id_2.input, subtask.input, details_task.input],
fn=update_load_details_component,
inputs=[model_id_1, model_id_2, subtask],
outputs=load_details_btn,
)
load_details_btn.click(
fn=load_details_dataframes,
inputs=[subtask, model_id_1, model_id_2],
outputs=[details_dataframe_1, details_dataframe_2],
).then(
fn=update_sample_idx_component,
inputs=[details_dataframe_1, details_dataframe_2],
outputs=sample_idx,
)
gr.on(
triggers=[details_dataframe_1.change, details_dataframe_2.change, sample_idx.change],
fn=display_details,
inputs=[sample_idx, details_dataframe_1, details_dataframe_2],
outputs=details,
)
clear_details_btn.click(
fn=clear_details,
outputs=[model_id_1, model_id_2, details_dataframe_1, details_dataframe_2, details_task, subtask, load_details_btn, sample_idx],
)
demo.launch()
|