Spaces:
Running
Running
Aaron Mueller
commited on
Commit
·
f21ebe8
1
Parent(s):
a6cd6c8
change cols
Browse files- src/about.py +1 -2
- src/leaderboard/read_evals.py +0 -9
- src/populate.py +2 -1
src/about.py
CHANGED
@@ -43,7 +43,6 @@ The leaderboards for each track of the 2024 BabyLM Challenge.
|
|
43 |
LLM_BENCHMARKS_TEXT = f"""
|
44 |
## How it works
|
45 |
This leaderboard accepts predictions files as input, and uploads the results to the leaderboard. The logic is the same as in the `score_predictions.py` script from the BabyLM 2024 evaluation pipeline repository.
|
46 |
-
|
47 |
"""
|
48 |
|
49 |
EVALUATION_QUEUE_TEXT = """
|
@@ -66,4 +65,4 @@ When we add extra information about models to the leaderboard, it will be automa
|
|
66 |
|
67 |
CITATION_BUTTON_LABEL = "If you would like to cite these results, please cite the 2024 BabyLM Findings paper, as well as the authors of the model(s) whose results you cite!"
|
68 |
CITATION_BUTTON_TEXT = r"""
|
69 |
-
"""
|
|
|
43 |
LLM_BENCHMARKS_TEXT = f"""
|
44 |
## How it works
|
45 |
This leaderboard accepts predictions files as input, and uploads the results to the leaderboard. The logic is the same as in the `score_predictions.py` script from the BabyLM 2024 evaluation pipeline repository.
|
|
|
46 |
"""
|
47 |
|
48 |
EVALUATION_QUEUE_TEXT = """
|
|
|
65 |
|
66 |
CITATION_BUTTON_LABEL = "If you would like to cite these results, please cite the 2024 BabyLM Findings paper, as well as the authors of the model(s) whose results you cite!"
|
67 |
CITATION_BUTTON_TEXT = r"""
|
68 |
+
"""
|
src/leaderboard/read_evals.py
CHANGED
@@ -165,12 +165,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
165 |
for model_result_filepath in model_result_filepaths:
|
166 |
# Creation of result
|
167 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
168 |
-
print("Here 1")
|
169 |
-
print(eval_result)
|
170 |
-
print(requests_path)
|
171 |
eval_result.update_with_request_file(requests_path)
|
172 |
-
print("Here 2")
|
173 |
-
print(eval_result)
|
174 |
|
175 |
# Store results of same eval together
|
176 |
eval_name = eval_result.eval_name
|
@@ -181,12 +176,8 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
181 |
|
182 |
results = []
|
183 |
for v in eval_results.values():
|
184 |
-
print ("Here 3")
|
185 |
-
print(v)
|
186 |
try:
|
187 |
v.to_dict() # we test if the dict version is complete
|
188 |
-
print("Here 4")
|
189 |
-
print(v)
|
190 |
results.append(v)
|
191 |
except KeyError: # not all eval values present
|
192 |
continue
|
|
|
165 |
for model_result_filepath in model_result_filepaths:
|
166 |
# Creation of result
|
167 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
|
|
|
|
|
|
168 |
eval_result.update_with_request_file(requests_path)
|
|
|
|
|
169 |
|
170 |
# Store results of same eval together
|
171 |
eval_name = eval_result.eval_name
|
|
|
176 |
|
177 |
results = []
|
178 |
for v in eval_results.values():
|
|
|
|
|
179 |
try:
|
180 |
v.to_dict() # we test if the dict version is complete
|
|
|
|
|
181 |
results.append(v)
|
182 |
except KeyError: # not all eval values present
|
183 |
continue
|
src/populate.py
CHANGED
@@ -11,10 +11,11 @@ from src.leaderboard.read_evals import get_raw_eval_results
|
|
11 |
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
|
12 |
"""Creates a dataframe from all the individual experiment results"""
|
13 |
raw_data = get_raw_eval_results(results_path, requests_path)
|
14 |
-
print(raw_data)
|
15 |
all_data_json = [v.to_dict() for v in raw_data]
|
16 |
|
|
|
17 |
df = pd.DataFrame.from_records(all_data_json)
|
|
|
18 |
# df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
19 |
df = df[cols].round(decimals=1)
|
20 |
|
|
|
11 |
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
|
12 |
"""Creates a dataframe from all the individual experiment results"""
|
13 |
raw_data = get_raw_eval_results(results_path, requests_path)
|
|
|
14 |
all_data_json = [v.to_dict() for v in raw_data]
|
15 |
|
16 |
+
print(all_data_json)
|
17 |
df = pd.DataFrame.from_records(all_data_json)
|
18 |
+
print(df)
|
19 |
# df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
20 |
df = df[cols].round(decimals=1)
|
21 |
|