Deep-Reinforcement-Learning-Leaderboard-V2

Runtime error

App Files Files Community

ThomasSimonini HF staff commited on Nov 30, 2023

Commit

b774671

•

1 Parent(s): 03af4bc

Add multithread

Browse files

Files changed (1) hide show

app.py +39 -1

app.py CHANGED Viewed

@@ -8,6 +8,8 @@ from huggingface_hub import HfApi, hf_hub_download, snapshot_download
 from huggingface_hub.repocard import metadata_load
 from apscheduler.schedulers.background import BackgroundScheduler
 from utils import *
 DATASET_REPO_URL = "https://huggingface.co/datasets/huggingface-projects/drlc-leaderboard-data"
@@ -196,6 +198,42 @@ def get_model_ids(rl_env):
     model_ids = [x.modelId for x in models]
     return model_ids
 def update_leaderboard_dataset(rl_env, path):
     # Get model ids associated with rl_env
     model_ids = get_model_ids(rl_env)
@@ -272,7 +310,7 @@ def run_update_dataset():
     path_ = download_leaderboard_dataset()
     for i in range(0, len(rl_envs)):
         rl_env = rl_envs[i]
-        update_leaderboard_dataset(rl_env["rl_env"], path_)
     api.upload_folder(
     folder_path=path_,

 from huggingface_hub.repocard import metadata_load
 from apscheduler.schedulers.background import BackgroundScheduler
+from tqdm.contrib.concurrent import thread_map
 from utils import *
 DATASET_REPO_URL = "https://huggingface.co/datasets/huggingface-projects/drlc-leaderboard-data"
     model_ids = [x.modelId for x in models]
     return model_ids
+# Parralelized version
+def update_leaderboard_dataset_parallel(rl_env, path):
+    # Get model ids associated with rl_env
+    model_ids = get_model_ids(rl_env)
+    def process_model(model_id):
+        meta = get_metadata(model_id)
+        #LOADED_MODEL_METADATA[model_id] = meta if meta is not None else ''
+        if meta is None:
+            return None
+        user_id = model_id.split('/')[0]
+        row = {}
+        row["User"] = user_id
+        row["Model"] = model_id
+        accuracy = parse_metrics_accuracy(meta)
+        mean_reward, std_reward = parse_rewards(accuracy)
+        mean_reward = mean_reward if not pd.isna(mean_reward) else 0
+        std_reward = std_reward if not pd.isna(std_reward) else 0
+        row["Results"] = mean_reward - std_reward
+        row["Mean Reward"] = mean_reward
+        row["Std Reward"] = std_reward
+        return row
+    data = list(thread_map(process_model, model_ids, desc="Processing models"))
+    # Filter out None results (models with no metadata)
+    data = [row for row in data if row is not None]
+    ranked_dataframe = rank_dataframe(pd.DataFrame.from_records(data))
+    new_history = ranked_dataframe
+    file_path = path + "/" + rl_env + ".csv"
+    new_history.to_csv(file_path, index=False)
+    return ranked_dataframe
 def update_leaderboard_dataset(rl_env, path):
     # Get model ids associated with rl_env
     model_ids = get_model_ids(rl_env)
     path_ = download_leaderboard_dataset()
     for i in range(0, len(rl_envs)):
         rl_env = rl_envs[i]
+        update_leaderboard_dataset_parallel(rl_env["rl_env"], path_)
     api.upload_folder(
     folder_path=path_,