File size: 4,817 Bytes
c2c9efa 25557b5 841e241 c2c9efa 6679087 07448fb e611814 c2c9efa e611814 023a289 e611814 023a289 6679087 e611814 023a289 6679087 e611814 2436603 6679087 05c90f4 0e93f79 3caeacd bf6ab81 3caeacd 7379857 3caeacd 71dfe85 3caeacd 71dfe85 3caeacd 7379857 3caeacd 7379857 71dfe85 7379857 c8b695a 07448fb bd858f5 7379857 25557b5 1f43e72 3caeacd 99aea78 5b4c5f8 3caeacd 5b4c5f8 ddc25db 8f68cc2 6679087 7e19f96 6679087 7379857 54202cb 7379857 3caeacd c8b695a 7379857 99aea78 bd858f5 7379857 0d84f54 7379857 eec78c0 7379857 e611814 07448fb e611814 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
from functools import partial
import gradio as gr
from src.constants import SUBTASKS, TASKS
from src.details import update_subtasks_component, update_load_details_component, load_details_dataframes, \
display_details, update_sample_idx_component, clear_details
from src.results import update_load_results_component, \
load_results_dataframes, display_results, update_tasks_component, clear_results, \
filter_latest_result_path_per_model, fetch_result_paths
# if __name__ == "__main__":
latest_result_path_per_model = filter_latest_result_path_per_model(fetch_result_paths())
load_results_dataframes = partial(load_results_dataframes, result_path_per_model=latest_result_path_per_model)
with gr.Blocks(fill_height=True) as demo:
gr.HTML("<h1 style='text-align: center;'>Compare Results of the 🤗 Open LLM Leaderboard</h1>")
gr.HTML("<h3 style='text-align: center;'>Select 2 models to load and compare their results</h3>")
with gr.Row():
with gr.Column():
model_id_1 = gr.Dropdown(choices=list(latest_result_path_per_model.keys()), label="Models")
dataframe_1 = gr.Dataframe(visible=False)
with gr.Column():
model_id_2 = gr.Dropdown(choices=list(latest_result_path_per_model.keys()), label="Models")
dataframe_2 = gr.Dataframe(visible=False)
with gr.Row():
# with gr.Tab("All"):
# pass
with gr.Tab("Results"):
load_results_btn = gr.Button("Load Results", interactive=False)
clear_results_btn = gr.Button("Clear Results")
task = gr.Radio(
["All"] + list(TASKS.values()),
label="Tasks",
info="Evaluation tasks to be displayed",
value="All",
visible=False,
)
with gr.Tab("Results"):
results = gr.HTML()
with gr.Tab("Configs"):
configs = gr.HTML()
with gr.Tab("Details"):
details_task = gr.Radio(
list(TASKS.values()),
label="Tasks",
info="Evaluation tasks to be loaded",
interactive=True,
)
subtask = gr.Radio(
SUBTASKS.get(details_task.value),
label="Subtasks",
info="Evaluation subtasks to be loaded (choose one of the Tasks above)",
)
load_details_btn = gr.Button("Load Details", interactive=False)
clear_details_btn = gr.Button("Clear Details")
sample_idx = gr.Number(
label="Sample Index",
info="Index of the sample to be displayed",
value=0,
minimum=0,
visible=False
)
details = gr.HTML()
details_dataframe_1 = gr.Dataframe(visible=False)
details_dataframe_2 = gr.Dataframe(visible=False)
details_dataframe = gr.DataFrame(visible=False)
gr.on(
triggers=[model_id_1.change, model_id_2.change],
fn=update_load_results_component,
outputs=load_results_btn,
)
load_results_btn.click(
fn=load_results_dataframes,
inputs=[model_id_1, model_id_2],
outputs=[dataframe_1, dataframe_2],
).then(
fn=update_tasks_component,
outputs=task,
)
gr.on(
triggers=[dataframe_1.change, dataframe_2.change, task.change],
fn=display_results,
inputs=[task, dataframe_1, dataframe_2],
outputs=[results, configs],
)
clear_results_btn.click(
fn=clear_results,
outputs=[model_id_1, model_id_2, dataframe_1, dataframe_2, task],
)
details_task.change(
fn=update_subtasks_component,
inputs=details_task,
outputs=subtask,
)
gr.on(
triggers=[model_id_1.change, model_id_2.change, subtask.change, details_task.change],
fn=update_load_details_component,
inputs=[model_id_1, model_id_2, subtask],
outputs=load_details_btn,
)
load_details_btn.click(
fn=load_details_dataframes,
inputs=[subtask, model_id_1, model_id_2],
outputs=[details_dataframe_1, details_dataframe_2],
).then(
fn=update_sample_idx_component,
inputs=[details_dataframe_1, details_dataframe_2],
outputs=sample_idx,
)
gr.on(
triggers=[details_dataframe_1.change, details_dataframe_2.change, sample_idx.change],
fn=display_details,
inputs=[sample_idx, details_dataframe_1, details_dataframe_2],
outputs=details,
)
clear_details_btn.click(
fn=clear_details,
outputs=[model_id_1, model_id_2, details_dataframe_1, details_dataframe_2, details_task, subtask, sample_idx],
)
demo.launch()
|