brunneis commited on
Commit
5a908b8
1 Parent(s): 72f97c2

Add Solbench score

Browse files
src/display/utils.py CHANGED
@@ -38,7 +38,8 @@ auto_eval_column_dict = [
38
  "", "str", True, never_hidden=True)),
39
  ("model", ColumnContent, create_column_content(
40
  "Model", "markdown", True, never_hidden=True)),
41
- ("average", ColumnContent, create_column_content("Average", "number", True)),
 
42
  ]
43
 
44
  # Add task-specific columns
 
38
  "", "str", True, never_hidden=True)),
39
  ("model", ColumnContent, create_column_content(
40
  "Model", "markdown", True, never_hidden=True)),
41
+ ("solbench", ColumnContent, create_column_content("Score", "number", True)),
42
+ # ("average", ColumnContent, create_column_content("Average", "number", True)),
43
  ]
44
 
45
  # Add task-specific columns
src/leaderboard/read_evals.py CHANGED
@@ -114,7 +114,11 @@ class EvalResult:
114
 
115
  def to_dict(self):
116
  """Converts the Eval Result to a dict compatible with our dataframe display"""
117
- average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
 
 
 
 
118
  data_dict = {
119
  "eval_name": self.eval_name, # not a column, just a save name,
120
  AutoEvalColumn.precision.name: self.precision.value.name,
@@ -124,7 +128,8 @@ class EvalResult:
124
  AutoEvalColumn.architecture.name: self.architecture,
125
  AutoEvalColumn.model.name: make_clickable_model(self.model_name),
126
  AutoEvalColumn.revision.name: self.revision,
127
- AutoEvalColumn.average.name: average,
 
128
  AutoEvalColumn.license.name: self.license,
129
  AutoEvalColumn.likes.name: self.likes,
130
  AutoEvalColumn.params.name: self.num_params,
 
114
 
115
  def to_dict(self):
116
  """Converts the Eval Result to a dict compatible with our dataframe display"""
117
+ # average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
118
+ solbench = sum([
119
+ self.results.get('naive_judge', 0),
120
+ self.results.get('human_eval_solidity', 0)
121
+ ]) / 2
122
  data_dict = {
123
  "eval_name": self.eval_name, # not a column, just a save name,
124
  AutoEvalColumn.precision.name: self.precision.value.name,
 
128
  AutoEvalColumn.architecture.name: self.architecture,
129
  AutoEvalColumn.model.name: make_clickable_model(self.model_name),
130
  AutoEvalColumn.revision.name: self.revision,
131
+ # AutoEvalColumn.average.name: average,
132
+ AutoEvalColumn.solbench.name: solbench,
133
  AutoEvalColumn.license.name: self.license,
134
  AutoEvalColumn.likes.name: self.likes,
135
  AutoEvalColumn.params.name: self.num_params,
src/populate.py CHANGED
@@ -19,7 +19,8 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
19
  all_data_json = [v.to_dict() for v in raw_data]
20
 
21
  df = pd.DataFrame.from_records(all_data_json)
22
- df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
 
23
  df = df[cols].round(decimals=2)
24
 
25
  # filter out if any of the benchmarks have not been produced
 
19
  all_data_json = [v.to_dict() for v in raw_data]
20
 
21
  df = pd.DataFrame.from_records(all_data_json)
22
+ # df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
23
+ df = df.sort_values(by=[AutoEvalColumn.solbench.name], ascending=False)
24
  df = df[cols].round(decimals=2)
25
 
26
  # filter out if any of the benchmarks have not been produced