|
import asyncio |
|
|
|
import gradio as gr |
|
import pandas as pd |
|
|
|
import src.constants as constants |
|
from src.hub import glob, load_jsonlines_file |
|
|
|
|
|
def update_task_description_component(task): |
|
base_description = constants.TASK_DESCRIPTIONS.get(task, "") |
|
additional_info = "A higher score is a better score." |
|
description = f"{base_description}\n\n{additional_info}" if base_description else additional_info |
|
return gr.Textbox( |
|
description, |
|
label="Task Description", |
|
lines=6, |
|
visible=True, |
|
) |
|
|
|
|
|
def update_subtasks_component(task, profile: gr.OAuthProfile | None): |
|
visible_login_btn = True if task == "leaderboard_gpqa" else False |
|
subtasks = None if task == "leaderboard_gpqa" and not profile else constants.SUBTASKS.get(task) |
|
return ( |
|
gr.LoginButton(size="sm", visible=visible_login_btn), |
|
gr.Radio( |
|
choices=subtasks, |
|
info="Evaluation subtasks to be loaded", |
|
value=None, |
|
), |
|
) |
|
|
|
|
|
def update_load_details_component(model_id_1, model_id_2, subtask): |
|
if (model_id_1 or model_id_2) and subtask: |
|
return gr.Button("Load Details", interactive=True) |
|
else: |
|
return gr.Button("Load Details", interactive=False) |
|
|
|
|
|
def fetch_details_paths(model_id, subtask): |
|
model_name_sanitized = model_id.replace("/", "__") |
|
dataset_id = constants.DETAILS_DATASET_ID.format(model_name_sanitized=model_name_sanitized) |
|
filename = constants.DETAILS_FILENAME.format(subtask=subtask) |
|
path = f"{dataset_id}/**/{filename}" |
|
return glob(path) |
|
|
|
|
|
async def load_details_dataframe(model_id, subtask): |
|
if not model_id or not subtask: |
|
return |
|
paths = fetch_details_paths(model_id, subtask) |
|
if not paths: |
|
return |
|
path = max(paths) |
|
data = await load_jsonlines_file(path) |
|
df = pd.json_normalize(data) |
|
df = df.sort_values(by=["doc_id"]) |
|
|
|
df["model_name"] = model_id |
|
return df |
|
|
|
|
|
|
|
async def load_details_dataframes(subtask, *model_ids): |
|
result = await asyncio.gather(*[load_details_dataframe(model_id, subtask) for model_id in model_ids]) |
|
return result |
|
|
|
|
|
def display_details(sample_idx, show_only_differences, *dfs): |
|
rows = [df.iloc[sample_idx] for df in dfs if "model_name" in df.columns and sample_idx < len(df)] |
|
if not rows: |
|
return |
|
|
|
df = pd.concat([row.rename(row.pop("model_name")) for row in rows], axis="columns") |
|
|
|
|
|
|
|
any_difference = pd.Series(False, index=df.index) |
|
if show_only_differences: |
|
any_difference = df.ne(df.iloc[:, 0], axis=0).any(axis=1) |
|
|
|
return ( |
|
df.style.format(escape="html", na_rep="") |
|
|
|
|
|
.hide([row for row in df.index if show_only_differences and not any_difference[row]]) |
|
|
|
.set_table_styles( |
|
[ |
|
{ |
|
"selector": "td", |
|
"props": [("overflow-wrap", "break-word"), ("max-width", "1px")], |
|
} |
|
] |
|
) |
|
.to_html() |
|
) |
|
|
|
|
|
def update_sample_idx_component(*dfs): |
|
maximum = max([len(df) - 1 for df in dfs]) |
|
return gr.Number( |
|
label="Sample Index", |
|
info="Index of the sample to be displayed", |
|
value=0, |
|
minimum=0, |
|
maximum=maximum, |
|
visible=True, |
|
) |
|
|
|
|
|
def clear_details(): |
|
|
|
return ( |
|
None, |
|
None, |
|
None, |
|
None, |
|
None, |
|
None, |
|
gr.Button("Load Details", interactive=False), |
|
gr.Number(label="Sample Index", info="Index of the sample to be displayed", value=0, minimum=0, visible=False), |
|
) |
|
|
|
|
|
def display_loading_message_for_details(): |
|
return "<h3 style='text-align: center;'>Loading...</h3>" |
|
|