Spaces:

open-llm-leaderboard
/

comparator

Running

albertvillanova HF staff commited on Oct 15

Commit

d0f55c6

•

1 Parent(s): da4a3b1

Load results asynchronously

Files changed (2) hide show

src/hub.py ADDED Viewed

+import httpx
+from huggingface_hub import hf_hub_url
+client = httpx.AsyncClient()
+async def load_file(path):
+    url = to_url(path)
+    r = await client.get(url)
+    return r.json()
+def to_url(path):
+    _, org_name, ds_name, filename = path.split("/", 3)
+    return hf_hub_url(repo_id=f"{org_name}/{ds_name}", filename=filename, repo_type="dataset")

src/results.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import json
 import gradio as gr
 import numpy as np
@@ -6,6 +6,7 @@ import pandas as pd
 from huggingface_hub import HfFileSystem
 from src.constants import RESULTS_DATASET_ID, TASKS
 def fetch_result_paths():
@@ -28,25 +29,24 @@ def update_load_results_component():
     return (gr.Button("Load", interactive=True), ) * 2
-def load_results_dataframe(model_id, result_paths_per_model=None):
     if not model_id or not result_paths_per_model:
         return
     result_paths = result_paths_per_model[model_id]
-    fs = HfFileSystem()
     data = {"results": {}, "configs": {}}
-    for path in result_paths:
-        with fs.open(path, "r") as f:
-            d = json.load(f)
-        data["results"].update(d["results"])
-        data["configs"].update(d["configs"])
-        model_name = d.get("model_name", "Model")
     df = pd.json_normalize([data])
     # df.columns = df.columns.str.split(".")  # .split return a list instead of a tuple
     return df.set_index(pd.Index([model_name])).reset_index()
-def load_results_dataframes(*model_ids, result_paths_per_model=None):
-    return [load_results_dataframe(model_id, result_paths_per_model=result_paths_per_model) for model_id in model_ids]
 def display_results(task, *dfs):

+import asyncio
 import gradio as gr
 import numpy as np
 from huggingface_hub import HfFileSystem
 from src.constants import RESULTS_DATASET_ID, TASKS
+from src.hub import load_file
 def fetch_result_paths():
     return (gr.Button("Load", interactive=True), ) * 2
+async def load_results_dataframe(model_id, result_paths_per_model=None):
     if not model_id or not result_paths_per_model:
         return
     result_paths = result_paths_per_model[model_id]
+    results = await asyncio.gather(*[load_file(path) for path in result_paths])
     data = {"results": {}, "configs": {}}
+    for result in results:
+        data["results"].update(result["results"])
+        data["configs"].update(result["configs"])
+        model_name = result.get("model_name", "Model")
     df = pd.json_normalize([data])
     # df.columns = df.columns.str.split(".")  # .split return a list instead of a tuple
     return df.set_index(pd.Index([model_name])).reset_index()
+async def load_results_dataframes(*model_ids, result_paths_per_model=None):
+    result = await asyncio.gather(*[load_results_dataframe(model_id, result_paths_per_model) for model_id in model_ids])
+    return result
 def display_results(task, *dfs):