Fix missing scores
Browse files- src/leaderboard/read_evals.py +6 -0
- src/populate.py +0 -54
src/leaderboard/read_evals.py
CHANGED
@@ -42,6 +42,12 @@ class EvalResult:
|
|
42 |
with open(json_filepath) as fp:
|
43 |
data = json.load(fp)
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
org, model = get_org_and_model_names_from_filepath(json_filepath)
|
46 |
config = data.get("config")
|
47 |
|
|
|
42 |
with open(json_filepath) as fp:
|
43 |
data = json.load(fp)
|
44 |
|
45 |
+
if 'human_eval_solidity_pass@1' not in data['results']:
|
46 |
+
data['results']['human_eval_solidity_pass@1'] = {'score': 0}
|
47 |
+
|
48 |
+
if 'human_eval_solidity_pass@3' not in data['results']:
|
49 |
+
data['results']['human_eval_solidity_pass@3'] = {'score': 0}
|
50 |
+
|
51 |
org, model = get_org_and_model_names_from_filepath(json_filepath)
|
52 |
config = data.get("config")
|
53 |
|
src/populate.py
CHANGED
@@ -28,59 +28,6 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
28 |
return df
|
29 |
|
30 |
|
31 |
-
# def get_evaluation_requests_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
32 |
-
# """Creates the different dataframes for the evaluation requestss requestes"""
|
33 |
-
# entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
|
34 |
-
# all_evals = []
|
35 |
-
|
36 |
-
# for entry in entries:
|
37 |
-
# if ".json" in entry:
|
38 |
-
# file_path = os.path.join(save_path, entry)
|
39 |
-
# try:
|
40 |
-
# with open(file_path, encoding='utf-8') as fp:
|
41 |
-
# data = json.load(fp)
|
42 |
-
# except UnicodeDecodeError as e:
|
43 |
-
# print(f"Unicode decoding error in {file_path}: {e}")
|
44 |
-
# continue
|
45 |
-
|
46 |
-
# # data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
|
47 |
-
# model_name = get_model_name_from_filepath(file_path)
|
48 |
-
# data[EvalQueueColumn.model.name] = make_clickable_model(model_name)
|
49 |
-
|
50 |
-
# data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
51 |
-
|
52 |
-
# all_evals.append(data)
|
53 |
-
|
54 |
-
# elif ".md" not in entry:
|
55 |
-
# # this is a folder
|
56 |
-
# sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if not e.startswith(".")]
|
57 |
-
# for sub_entry in sub_entries:
|
58 |
-
# file_path = os.path.join(save_path, entry, sub_entry)
|
59 |
-
# try:
|
60 |
-
# with open(file_path, encoding='utf-8') as fp:
|
61 |
-
# data = json.load(fp)
|
62 |
-
# except json.JSONDecodeError:
|
63 |
-
# print(f"Error reading {file_path}")
|
64 |
-
# continue
|
65 |
-
# except UnicodeDecodeError as e:
|
66 |
-
# print(f"Unicode decoding error in {file_path}: {e}")
|
67 |
-
# continue
|
68 |
-
|
69 |
-
# # data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
|
70 |
-
# model_name = get_model_name_from_filepath(file_path)
|
71 |
-
# data[EvalQueueColumn.model.name] = make_clickable_model(model_name)
|
72 |
-
|
73 |
-
# data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
74 |
-
# all_evals.append(data)
|
75 |
-
|
76 |
-
# pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
|
77 |
-
# running_list = [e for e in all_evals if e["status"] == "RUNNING"]
|
78 |
-
# finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
|
79 |
-
# df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
|
80 |
-
# df_running = pd.DataFrame.from_records(running_list, columns=cols)
|
81 |
-
# df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
|
82 |
-
# return df_finished[cols], df_running[cols], df_pending[cols]
|
83 |
-
|
84 |
def get_evaluation_requests_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
85 |
"""Creates the different dataframes for the evaluation requestss requested."""
|
86 |
all_evals = []
|
@@ -94,7 +41,6 @@ def get_evaluation_requests_df(save_path: str, cols: list) -> list[pd.DataFrame]
|
|
94 |
return None
|
95 |
|
96 |
model_name = get_model_name_from_filepath(file_path)
|
97 |
-
# data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
|
98 |
data[EvalQueueColumn.model.name] = make_clickable_model(model_name)
|
99 |
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
100 |
return data
|
|
|
28 |
return df
|
29 |
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
def get_evaluation_requests_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
32 |
"""Creates the different dataframes for the evaluation requestss requested."""
|
33 |
all_evals = []
|
|
|
41 |
return None
|
42 |
|
43 |
model_name = get_model_name_from_filepath(file_path)
|
|
|
44 |
data[EvalQueueColumn.model.name] = make_clickable_model(model_name)
|
45 |
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
46 |
return data
|