Aaron Mueller commited on
Commit
f21ebe8
·
1 Parent(s): a6cd6c8

change cols

Browse files
src/about.py CHANGED
@@ -43,7 +43,6 @@ The leaderboards for each track of the 2024 BabyLM Challenge.
43
  LLM_BENCHMARKS_TEXT = f"""
44
  ## How it works
45
  This leaderboard accepts predictions files as input, and uploads the results to the leaderboard. The logic is the same as in the `score_predictions.py` script from the BabyLM 2024 evaluation pipeline repository.
46
-
47
  """
48
 
49
  EVALUATION_QUEUE_TEXT = """
@@ -66,4 +65,4 @@ When we add extra information about models to the leaderboard, it will be automa
66
 
67
  CITATION_BUTTON_LABEL = "If you would like to cite these results, please cite the 2024 BabyLM Findings paper, as well as the authors of the model(s) whose results you cite!"
68
  CITATION_BUTTON_TEXT = r"""
69
- """
 
43
  LLM_BENCHMARKS_TEXT = f"""
44
  ## How it works
45
  This leaderboard accepts predictions files as input, and uploads the results to the leaderboard. The logic is the same as in the `score_predictions.py` script from the BabyLM 2024 evaluation pipeline repository.
 
46
  """
47
 
48
  EVALUATION_QUEUE_TEXT = """
 
65
 
66
  CITATION_BUTTON_LABEL = "If you would like to cite these results, please cite the 2024 BabyLM Findings paper, as well as the authors of the model(s) whose results you cite!"
67
  CITATION_BUTTON_TEXT = r"""
68
+ """
src/leaderboard/read_evals.py CHANGED
@@ -165,12 +165,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
165
  for model_result_filepath in model_result_filepaths:
166
  # Creation of result
167
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
168
- print("Here 1")
169
- print(eval_result)
170
- print(requests_path)
171
  eval_result.update_with_request_file(requests_path)
172
- print("Here 2")
173
- print(eval_result)
174
 
175
  # Store results of same eval together
176
  eval_name = eval_result.eval_name
@@ -181,12 +176,8 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
181
 
182
  results = []
183
  for v in eval_results.values():
184
- print ("Here 3")
185
- print(v)
186
  try:
187
  v.to_dict() # we test if the dict version is complete
188
- print("Here 4")
189
- print(v)
190
  results.append(v)
191
  except KeyError: # not all eval values present
192
  continue
 
165
  for model_result_filepath in model_result_filepaths:
166
  # Creation of result
167
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
 
 
 
168
  eval_result.update_with_request_file(requests_path)
 
 
169
 
170
  # Store results of same eval together
171
  eval_name = eval_result.eval_name
 
176
 
177
  results = []
178
  for v in eval_results.values():
 
 
179
  try:
180
  v.to_dict() # we test if the dict version is complete
 
 
181
  results.append(v)
182
  except KeyError: # not all eval values present
183
  continue
src/populate.py CHANGED
@@ -11,10 +11,11 @@ from src.leaderboard.read_evals import get_raw_eval_results
11
  def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
12
  """Creates a dataframe from all the individual experiment results"""
13
  raw_data = get_raw_eval_results(results_path, requests_path)
14
- print(raw_data)
15
  all_data_json = [v.to_dict() for v in raw_data]
16
 
 
17
  df = pd.DataFrame.from_records(all_data_json)
 
18
  # df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
19
  df = df[cols].round(decimals=1)
20
 
 
11
  def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
12
  """Creates a dataframe from all the individual experiment results"""
13
  raw_data = get_raw_eval_results(results_path, requests_path)
 
14
  all_data_json = [v.to_dict() for v in raw_data]
15
 
16
+ print(all_data_json)
17
  df = pd.DataFrame.from_records(all_data_json)
18
+ print(df)
19
  # df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
20
  df = df[cols].round(decimals=1)
21