File size: 4,772 Bytes
c2c9efa
 
25557b5
 
841e241
 
 
c2c9efa
 
 
6679087
07448fb
e611814
 
c2c9efa
e611814
 
 
023a289
e611814
 
 
023a289
6679087
e611814
023a289
6679087
e611814
 
2436603
6679087
05c90f4
0e93f79
 
3caeacd
 
 
 
 
bf6ab81
3caeacd
 
 
 
 
7379857
3caeacd
71dfe85
3caeacd
71dfe85
3caeacd
 
7379857
3caeacd
7379857
71dfe85
7379857
c8b695a
07448fb
bd858f5
 
 
 
 
 
 
7379857
 
 
 
25557b5
3caeacd
 
 
 
 
99aea78
 
 
5b4c5f8
3caeacd
5b4c5f8
ddc25db
8f68cc2
 
6679087
7e19f96
6679087
7379857
54202cb
 
 
 
7379857
3caeacd
 
 
 
 
c8b695a
 
 
 
 
 
7379857
99aea78
 
 
bd858f5
 
 
 
7379857
0d84f54
 
7379857
eec78c0
7379857
e611814
07448fb
 
 
 
e611814
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from functools import partial

import gradio as gr

from src.constants import SUBTASKS, TASKS
from src.details import update_subtasks_component, update_load_details_component, load_details_dataframes, \
    display_details, update_sample_idx_component, clear_details
from src.results import update_load_results_component, \
    load_results_dataframes, display_results, update_tasks_component, clear_results, \
    filter_latest_result_path_per_model, fetch_result_paths


# if __name__ == "__main__":
latest_result_path_per_model = filter_latest_result_path_per_model(fetch_result_paths())
load_results_dataframes = partial(load_results_dataframes, result_path_per_model=latest_result_path_per_model)

with gr.Blocks(fill_height=True) as demo:
    gr.HTML("<h1 style='text-align: center;'>Compare Results of the 🤗 Open LLM Leaderboard</h1>")
    gr.HTML("<h3 style='text-align: center;'>Select 2 models to load and compare their results</h3>")

    with gr.Row():
        with gr.Column():
            model_id_1 = gr.Dropdown(choices=list(latest_result_path_per_model.keys()), label="Models")
            dataframe_1 = gr.Dataframe(visible=False)
        with gr.Column():
            model_id_2 = gr.Dropdown(choices=list(latest_result_path_per_model.keys()), label="Models")
            dataframe_2 = gr.Dataframe(visible=False)

    with gr.Row():
        # with gr.Tab("All"):
        #     pass
        with gr.Tab("Results"):
            load_results_btn = gr.Button("Load Results", interactive=False)
            clear_results_btn = gr.Button("Clear Results")
            task = gr.Radio(
                ["All"] + list(TASKS.values()),
                label="Tasks",
                info="Evaluation tasks to be displayed",
                value="All",
                visible=False,
            )
            with gr.Tab("Results"):
                results = gr.HTML()
            with gr.Tab("Configs"):
                configs = gr.HTML()
        with gr.Tab("Details"):
            details_task = gr.Radio(
                list(TASKS.values()),
                label="Tasks",
                info="Evaluation tasks to be loaded",
                interactive=True,
            )
            subtask = gr.Radio(
                SUBTASKS.get(details_task.value),
                label="Subtasks",
                info="Evaluation subtasks to be loaded (choose one of the Tasks above)",
            )
            load_details_btn = gr.Button("Load Details", interactive=False)
            clear_details_btn = gr.Button("Clear Details")
            sample_idx = gr.Number(
                label="Sample Index",
                info="Index of the sample to be displayed",
                value=0,
                minimum=0,
                visible=False
            )
            details = gr.HTML()
            details_dataframe_1 = gr.Dataframe(visible=False)
            details_dataframe_2 = gr.Dataframe(visible=False)
            details_dataframe = gr.DataFrame(visible=False)

    model_id_1.change(
        fn=update_load_results_component,
        outputs=load_results_btn,
    )
    load_results_btn.click(
        fn=load_results_dataframes,
        inputs=[model_id_1, model_id_2],
        outputs=[dataframe_1, dataframe_2],
    ).then(
        fn=update_tasks_component,
        outputs=task,
    )
    gr.on(
        triggers=[dataframe_1.change, dataframe_2.change, task.change],
        fn=display_results,
        inputs=[task, dataframe_1, dataframe_2],
        outputs=[results, configs],
    )
    clear_results_btn.click(
        fn=clear_results,
        outputs=[model_id_1, model_id_2, dataframe_1, dataframe_2, task],
    )

    details_task.change(
        fn=update_subtasks_component,
        inputs=details_task,
        outputs=subtask,
    )
    gr.on(
        triggers=[model_id_1.change, model_id_2.change, subtask.change, details_task.change],
        fn=update_load_details_component,
        inputs=[model_id_1, model_id_2, subtask],
        outputs=load_details_btn,
    )
    load_details_btn.click(
        fn=load_details_dataframes,
        inputs=[subtask, model_id_1, model_id_2],
        outputs=[details_dataframe_1, details_dataframe_2],
    ).then(
        fn=update_sample_idx_component,
        inputs=[details_dataframe_1, details_dataframe_2],
        outputs=sample_idx,
    )
    gr.on(
        triggers=[details_dataframe_1.change, details_dataframe_2.change, sample_idx.change],
        fn=display_details,
        inputs=[sample_idx, details_dataframe_1, details_dataframe_2],
        outputs=details,
    )
    clear_details_btn.click(
        fn=clear_details,
        outputs=[model_id_1, model_id_2, details_dataframe_1, details_dataframe_2, details_task, subtask, sample_idx],
    )

demo.launch()