brunneis commited on
Commit
308e87c
1 Parent(s): e3bcf20

Fix solbench score calc

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +17 -5
src/leaderboard/read_evals.py CHANGED
@@ -121,11 +121,23 @@ class EvalResult:
121
  def to_dict(self):
122
  """Converts the Eval Result to a dict compatible with our dataframe display"""
123
  # average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
124
- solbench = (
125
- self.results.get('naive_judge', 0) * 0.3 +
126
- self.results.get('human_eval_solidity_pass@1', 0) * 0.5 +
127
- self.results.get('human_eval_solidity_pass@3', 0) * 0.2
128
- )
 
 
 
 
 
 
 
 
 
 
 
 
129
  data_dict = {
130
  "eval_name": self.eval_name, # not a column, just a save name,
131
  AutoEvalColumn.precision.name: self.precision.value.name,
 
121
  def to_dict(self):
122
  """Converts the Eval Result to a dict compatible with our dataframe display"""
123
  # average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
124
+ scores = {
125
+ 'naive_judge': self.results.get('naive_judge', 0),
126
+ 'human_eval_solidity_pass@1': self.results.get('human_eval_solidity_pass@1', 0),
127
+ 'human_eval_solidity_pass@3': self.results.get('human_eval_solidity_pass@3', 0)
128
+ }
129
+
130
+ solbench = 0
131
+ non_zero_scores = {k: v for k, v in scores.items() if v != 0}
132
+ if non_zero_scores:
133
+ weights = {
134
+ 'naive_judge': 0.3,
135
+ 'human_eval_solidity_pass@1': 0.5,
136
+ 'human_eval_solidity_pass@3': 0.2
137
+ }
138
+ total_weight = sum(weights[k] for k in non_zero_scores)
139
+ solbench = sum(scores[k] * weights[k] / total_weight for k in non_zero_scores)
140
+
141
  data_dict = {
142
  "eval_name": self.eval_name, # not a column, just a save name,
143
  AutoEvalColumn.precision.name: self.precision.value.name,