Spaces:

babylm
/

leaderboard-2024

Running

Aaron Mueller commited on Nov 22, 2024

Commit

f21ebe8

1 Parent(s): a6cd6c8

change cols

Files changed (3) hide show

src/about.py CHANGED Viewed

@@ -43,7 +43,6 @@ The leaderboards for each track of the 2024 BabyLM Challenge.
 LLM_BENCHMARKS_TEXT = f"""
 ## How it works
 This leaderboard accepts predictions files as input, and uploads the results to the leaderboard. The logic is the same as in the `score_predictions.py` script from the BabyLM 2024 evaluation pipeline repository.
 """
 EVALUATION_QUEUE_TEXT = """
@@ -66,4 +65,4 @@ When we add extra information about models to the leaderboard, it will be automa
 CITATION_BUTTON_LABEL = "If you would like to cite these results, please cite the 2024 BabyLM Findings paper, as well as the authors of the model(s) whose results you cite!"
 CITATION_BUTTON_TEXT = r"""
-"""

 LLM_BENCHMARKS_TEXT = f"""
 ## How it works
 This leaderboard accepts predictions files as input, and uploads the results to the leaderboard. The logic is the same as in the `score_predictions.py` script from the BabyLM 2024 evaluation pipeline repository.
 """
 EVALUATION_QUEUE_TEXT = """
 CITATION_BUTTON_LABEL = "If you would like to cite these results, please cite the 2024 BabyLM Findings paper, as well as the authors of the model(s) whose results you cite!"
 CITATION_BUTTON_TEXT = r"""
+"""

src/leaderboard/read_evals.py CHANGED Viewed

@@ -165,12 +165,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
     for model_result_filepath in model_result_filepaths:
         # Creation of result
         eval_result = EvalResult.init_from_json_file(model_result_filepath)
-        print("Here 1")
-        print(eval_result)
-        print(requests_path)
         eval_result.update_with_request_file(requests_path)
-        print("Here 2")
-        print(eval_result)
         # Store results of same eval together
         eval_name = eval_result.eval_name
@@ -181,12 +176,8 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
     results = []
     for v in eval_results.values():
-        print ("Here 3")
-        print(v)
         try:
             v.to_dict() # we test if the dict version is complete
-            print("Here 4")
-            print(v)
             results.append(v)
         except KeyError:  # not all eval values present
             continue

     for model_result_filepath in model_result_filepaths:
         # Creation of result
         eval_result = EvalResult.init_from_json_file(model_result_filepath)
         eval_result.update_with_request_file(requests_path)
         # Store results of same eval together
         eval_name = eval_result.eval_name
     results = []
     for v in eval_results.values():
         try:
             v.to_dict() # we test if the dict version is complete
             results.append(v)
         except KeyError:  # not all eval values present
             continue

src/populate.py CHANGED Viewed

@@ -11,10 +11,11 @@ from src.leaderboard.read_evals import get_raw_eval_results
 def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
     """Creates a dataframe from all the individual experiment results"""
     raw_data = get_raw_eval_results(results_path, requests_path)
-    print(raw_data)
     all_data_json = [v.to_dict() for v in raw_data]
     df = pd.DataFrame.from_records(all_data_json)
     # df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
     df = df[cols].round(decimals=1)

 def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
     """Creates a dataframe from all the individual experiment results"""
     raw_data = get_raw_eval_results(results_path, requests_path)
     all_data_json = [v.to_dict() for v in raw_data]
+    print(all_data_json)
     df = pd.DataFrame.from_records(all_data_json)
+    print(df)
     # df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
     df = df[cols].round(decimals=1)