Commit
•
b774671
1
Parent(s):
03af4bc
Add multithread
Browse files
app.py
CHANGED
@@ -8,6 +8,8 @@ from huggingface_hub import HfApi, hf_hub_download, snapshot_download
|
|
8 |
from huggingface_hub.repocard import metadata_load
|
9 |
from apscheduler.schedulers.background import BackgroundScheduler
|
10 |
|
|
|
|
|
11 |
from utils import *
|
12 |
|
13 |
DATASET_REPO_URL = "https://huggingface.co/datasets/huggingface-projects/drlc-leaderboard-data"
|
@@ -196,6 +198,42 @@ def get_model_ids(rl_env):
|
|
196 |
model_ids = [x.modelId for x in models]
|
197 |
return model_ids
|
198 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
def update_leaderboard_dataset(rl_env, path):
|
200 |
# Get model ids associated with rl_env
|
201 |
model_ids = get_model_ids(rl_env)
|
@@ -272,7 +310,7 @@ def run_update_dataset():
|
|
272 |
path_ = download_leaderboard_dataset()
|
273 |
for i in range(0, len(rl_envs)):
|
274 |
rl_env = rl_envs[i]
|
275 |
-
|
276 |
|
277 |
api.upload_folder(
|
278 |
folder_path=path_,
|
|
|
8 |
from huggingface_hub.repocard import metadata_load
|
9 |
from apscheduler.schedulers.background import BackgroundScheduler
|
10 |
|
11 |
+
from tqdm.contrib.concurrent import thread_map
|
12 |
+
|
13 |
from utils import *
|
14 |
|
15 |
DATASET_REPO_URL = "https://huggingface.co/datasets/huggingface-projects/drlc-leaderboard-data"
|
|
|
198 |
model_ids = [x.modelId for x in models]
|
199 |
return model_ids
|
200 |
|
201 |
+
# Parralelized version
|
202 |
+
def update_leaderboard_dataset_parallel(rl_env, path):
|
203 |
+
# Get model ids associated with rl_env
|
204 |
+
model_ids = get_model_ids(rl_env)
|
205 |
+
|
206 |
+
def process_model(model_id):
|
207 |
+
meta = get_metadata(model_id)
|
208 |
+
#LOADED_MODEL_METADATA[model_id] = meta if meta is not None else ''
|
209 |
+
if meta is None:
|
210 |
+
return None
|
211 |
+
user_id = model_id.split('/')[0]
|
212 |
+
row = {}
|
213 |
+
row["User"] = user_id
|
214 |
+
row["Model"] = model_id
|
215 |
+
accuracy = parse_metrics_accuracy(meta)
|
216 |
+
mean_reward, std_reward = parse_rewards(accuracy)
|
217 |
+
mean_reward = mean_reward if not pd.isna(mean_reward) else 0
|
218 |
+
std_reward = std_reward if not pd.isna(std_reward) else 0
|
219 |
+
row["Results"] = mean_reward - std_reward
|
220 |
+
row["Mean Reward"] = mean_reward
|
221 |
+
row["Std Reward"] = std_reward
|
222 |
+
return row
|
223 |
+
|
224 |
+
data = list(thread_map(process_model, model_ids, desc="Processing models"))
|
225 |
+
|
226 |
+
# Filter out None results (models with no metadata)
|
227 |
+
data = [row for row in data if row is not None]
|
228 |
+
|
229 |
+
ranked_dataframe = rank_dataframe(pd.DataFrame.from_records(data))
|
230 |
+
new_history = ranked_dataframe
|
231 |
+
file_path = path + "/" + rl_env + ".csv"
|
232 |
+
new_history.to_csv(file_path, index=False)
|
233 |
+
|
234 |
+
return ranked_dataframe
|
235 |
+
|
236 |
+
|
237 |
def update_leaderboard_dataset(rl_env, path):
|
238 |
# Get model ids associated with rl_env
|
239 |
model_ids = get_model_ids(rl_env)
|
|
|
310 |
path_ = download_leaderboard_dataset()
|
311 |
for i in range(0, len(rl_envs)):
|
312 |
rl_env = rl_envs[i]
|
313 |
+
update_leaderboard_dataset_parallel(rl_env["rl_env"], path_)
|
314 |
|
315 |
api.upload_folder(
|
316 |
folder_path=path_,
|