Spaces:
Runtime error
Runtime error
File size: 3,649 Bytes
15c8167 26e855f 15c8167 460930f 15c8167 460930f 15c8167 8e404a5 15c8167 8e404a5 15c8167 9c39267 15c8167 8e404a5 c2c9efa 8e404a5 460930f 8e404a5 15c8167 8e404a5 15c8167 6099782 15c8167 26e855f 15c8167 9c39267 15c8167 bf6ab81 9c39267 15c8167 9c39267 15c8167 9c39267 15c8167 26e855f a4b20f4 26e855f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import json
import gradio as gr
import numpy as np
import pandas as pd
from huggingface_hub import HfFileSystem
from src.constants import RESULTS_DATASET_ID, TASKS
def fetch_result_paths():
fs = HfFileSystem()
paths = fs.glob(f"{RESULTS_DATASET_ID}/**/**/*.json")
return paths
def sort_result_paths_per_model(paths):
from collections import defaultdict
d = defaultdict(list)
for path in paths:
model_id, _ = path[len(RESULTS_DATASET_ID) + 1:].rsplit("/", 1)
d[model_id].append(path)
return {model_id: sorted(paths) for model_id, paths in d.items()}
def update_load_results_component():
return (gr.Button("Load", interactive=True), ) * 2
def load_results_dataframe(model_id, result_paths_per_model=None):
if not model_id or not result_paths_per_model:
return
result_paths = result_paths_per_model[model_id]
fs = HfFileSystem()
data = {"results": {}, "configs": {}}
for path in result_paths:
with fs.open(path, "r") as f:
d = json.load(f)
data["results"].update(d["results"])
data["configs"].update(d["configs"])
model_name = d.get("model_name", "Model")
df = pd.json_normalize([{key: value for key, value in data.items()}])
# df.columns = df.columns.str.split(".") # .split return a list instead of a tuple
return df.set_index(pd.Index([model_name])).reset_index()
def load_results_dataframes(*model_ids, result_paths_per_model=None):
return [load_results_dataframe(model_id, result_paths_per_model=result_paths_per_model) for model_id in model_ids]
def display_results(task, *dfs):
dfs = [df.set_index("index") for df in dfs if "index" in df.columns]
if not dfs:
return None, None
df = pd.concat(dfs)
df = df.T.rename_axis(columns=None)
return display_tab("results", df, task), display_tab("configs", df, task)
def display_tab(tab, df, task):
df = df.style.format(na_rep="")
df.hide(
[
row
for row in df.index
if (
not row.startswith(f"{tab}.")
or row.startswith(f"{tab}.leaderboard.")
or row.endswith(".alias")
or (not row.startswith(f"{tab}.{task}") if task != "All" else row.startswith(f"{tab}.leaderboard_arc_challenge"))
)
],
axis="index",
)
df.apply(highlight_min_max, axis=1)
start = len(f"{tab}.leaderboard_") if task == "All" else len(f"{tab}.{task} ")
df.format_index(lambda idx: idx[start:].removesuffix(",none"), axis="index")
return df.to_html()
def update_tasks_component():
return (
gr.Radio(
["All"] + list(TASKS.values()),
label="Tasks",
info="Evaluation tasks to be displayed",
value="All",
visible=True,
),
) * 2
def clear_results():
# model_id_1, model_id_2, dataframe_1, dataframe_2, load_results_btn, load_configs_btn, results_task, configs_task
return (
None, None, None, None,
*(gr.Button("Load", interactive=False), ) * 2,
*(
gr.Radio(
["All"] + list(TASKS.values()),
label="Tasks",
info="Evaluation tasks to be displayed",
value="All",
visible=False,
),
) * 2,
)
def highlight_min_max(s):
if s.name.endswith("acc,none") or s.name.endswith("acc_norm,none") or s.name.endswith("exact_match,none"):
return np.where(s == np.nanmax(s.values), "background-color:green", "background-color:red")
else:
return [""] * len(s)
|