File size: 7,241 Bytes
c2c9efa 25557b5 841e241 ca2b34f c2c9efa 8e404a5 6679087 07448fb e611814 8e404a5 e611814 4289e9d e611814 023a289 c1fc7f4 7a0e5b8 e611814 8e404a5 6679087 e611814 8e404a5 6679087 e611814 05c90f4 9c39267 3caeacd bf6ab81 3caeacd ca2b34f 9c39267 ca2b34f 9c39267 7379857 3caeacd 5009abb 3caeacd 71dfe85 3caeacd ca2b34f 7379857 3caeacd 7379857 71dfe85 7379857 c8b695a 07448fb bd858f5 7379857 25557b5 1f43e72 9c39267 3caeacd 9c39267 3caeacd 9c39267 99aea78 5b4c5f8 3caeacd 9c39267 ddc25db 9c39267 ca2b34f 8f68cc2 9c39267 6679087 9c39267 6679087 7379857 9c39267 54202cb 9c39267 54202cb 7379857 3caeacd ca2b34f 3caeacd c8b695a 1c1cb58 c8b695a 7379857 99aea78 bd858f5 7379857 0d84f54 7379857 eec78c0 7379857 e611814 07448fb 1c1cb58 07448fb e611814 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
from functools import partial
import gradio as gr
from src.constants import SUBTASKS, TASKS
from src.details import update_subtasks_component, update_load_details_component, load_details_dataframes, \
display_details, update_sample_idx_component, clear_details, update_task_description_component
from src.results import update_load_results_component, \
load_results_dataframes, display_results, update_tasks_component, clear_results, \
sort_result_paths_per_model, fetch_result_paths
# if __name__ == "__main__":
result_paths_per_model = sort_result_paths_per_model(fetch_result_paths())
load_results_dataframes = partial(load_results_dataframes, result_paths_per_model=result_paths_per_model)
with gr.Blocks(fill_height=True, fill_width=True) as demo:
gr.HTML("<h1 style='text-align: center;'>Compare Results of the π€ Open LLM Leaderboard</h1>")
gr.HTML("<h3 style='text-align: center;'>Select 2 models to load and compare their results</h3>")
gr.HTML("<p style='text-align: center; color:orange;'>⚠ This demo is a beta version and may contain bugs, performance issues, incomplete features, or unexpected behavior. We appreciate your understanding and welcome any feedback through the Community tab to help improve the final product.</p>")
gr.Markdown("Compare Results of the π€ [Open LLM Leaderboard](https://huggingface.co/spaces/open-llm-leaderboard-old/open_llm_leaderboard). "
"Check out the [documentation](https://huggingface.co/docs/leaderboards/open_llm_leaderboard/about) π to find explanations on the evaluations used, their configuration parameters and details on the input/outputs for the models."
)
with gr.Row():
with gr.Column():
model_id_1 = gr.Dropdown(choices=list(result_paths_per_model.keys()), label="Models")
dataframe_1 = gr.Dataframe(visible=False)
with gr.Column():
model_id_2 = gr.Dropdown(choices=list(result_paths_per_model.keys()), label="Models")
dataframe_2 = gr.Dataframe(visible=False)
with gr.Row():
with gr.Tab("Results"):
load_results_btn = gr.Button("Load", interactive=False)
clear_results_btn = gr.Button("Clear")
results_task = gr.Radio(
["All"] + list(TASKS.values()),
label="Tasks",
info="Evaluation tasks to be displayed",
value="All",
visible=False,
)
results_task_description = gr.Textbox(
label="Task Description",
lines=3,
visible=False,
)
results = gr.HTML()
with gr.Tab("Configs"):
load_configs_btn = gr.Button("Load", interactive=False)
clear_configs_btn = gr.Button("Clear")
configs_task = gr.Radio(
["All"] + list(TASKS.values()),
label="Tasks",
info="Evaluation tasks to be displayed",
value="All",
visible=False,
)
configs_task_description = gr.Textbox(
label="Task Description",
lines=3,
visible=False,
)
configs = gr.HTML()
with gr.Tab("Details"):
details_task = gr.Radio(
list(value for value in TASKS.values() if value[1] != "leaderboard_gpqa"),
label="Tasks",
info="Evaluation tasks to be loaded",
interactive=True,
)
details_task_description = gr.Textbox(
label="Task Description",
lines=3,
)
subtask = gr.Radio(
SUBTASKS.get(details_task.value),
label="Subtasks",
info="Evaluation subtasks to be loaded (choose one of the Tasks above)",
)
load_details_btn = gr.Button("Load Details", interactive=False)
clear_details_btn = gr.Button("Clear Details")
sample_idx = gr.Number(
label="Sample Index",
info="Index of the sample to be displayed",
value=0,
minimum=0,
visible=False
)
details = gr.HTML()
details_dataframe_1 = gr.Dataframe(visible=False)
details_dataframe_2 = gr.Dataframe(visible=False)
details_dataframe = gr.DataFrame(visible=False)
gr.on(
triggers=[model_id_1.input, model_id_2.input],
fn=update_load_results_component,
outputs=[load_results_btn, load_configs_btn],
)
gr.on(
triggers=[load_results_btn.click, load_configs_btn.click],
fn=load_results_dataframes,
inputs=[model_id_1, model_id_2],
outputs=[dataframe_1, dataframe_2],
).then(
fn=update_tasks_component,
outputs=[results_task, configs_task],
)
# Synchronize the results_task and configs_task radio buttons
results_task.input(fn=lambda task: task, inputs=results_task, outputs=configs_task)
configs_task.input(fn=lambda task: task, inputs=configs_task, outputs=results_task)
# Update task descriptions
results_task.change(
fn=update_task_description_component,
inputs=results_task,
outputs=results_task_description,
).then(
fn=update_task_description_component,
inputs=results_task,
outputs=configs_task_description,
)
# Display results
gr.on(
triggers=[dataframe_1.change, dataframe_2.change, results_task.change],
fn=display_results,
inputs=[results_task, dataframe_1, dataframe_2],
outputs=[results, configs],
)
gr.on(
triggers=[clear_results_btn.click, clear_configs_btn.click],
fn=clear_results,
outputs=[model_id_1, model_id_2, dataframe_1, dataframe_2, load_results_btn, load_configs_btn, results_task, configs_task],
)
details_task.change(
fn=update_task_description_component,
inputs=details_task,
outputs=details_task_description,
).then(
fn=update_subtasks_component,
inputs=details_task,
outputs=subtask,
)
gr.on(
triggers=[model_id_1.input, model_id_2.input, subtask.input, details_task.input],
fn=update_load_details_component,
inputs=[model_id_1, model_id_2, subtask],
outputs=load_details_btn,
)
load_details_btn.click(
fn=load_details_dataframes,
inputs=[subtask, model_id_1, model_id_2],
outputs=[details_dataframe_1, details_dataframe_2],
).then(
fn=update_sample_idx_component,
inputs=[details_dataframe_1, details_dataframe_2],
outputs=sample_idx,
)
gr.on(
triggers=[details_dataframe_1.change, details_dataframe_2.change, sample_idx.change],
fn=display_details,
inputs=[sample_idx, details_dataframe_1, details_dataframe_2],
outputs=details,
)
clear_details_btn.click(
fn=clear_details,
outputs=[model_id_1, model_id_2, details_dataframe_1, details_dataframe_2, details_task, subtask, load_details_btn, sample_idx],
)
demo.launch()
|