brunneis commited on
Commit
8a3c7da
1 Parent(s): b66e0d8

Update HumanEval for Solidity scores

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +8 -8
src/leaderboard/read_evals.py CHANGED
@@ -42,11 +42,11 @@ class EvalResult:
42
  with open(json_filepath) as fp:
43
  data = json.load(fp)
44
 
45
- if 'human_eval_solidity_pass@1' not in data['results']:
46
- data['results']['human_eval_solidity_pass@1'] = {'score': 0}
47
 
48
- if 'human_eval_solidity_pass@3' not in data['results']:
49
- data['results']['human_eval_solidity_pass@3'] = {'score': 0}
50
 
51
  org, model = get_org_and_model_names_from_filepath(json_filepath)
52
  config = data.get("config")
@@ -123,8 +123,8 @@ class EvalResult:
123
  # average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
124
  scores = {
125
  'naive_judge': self.results.get('naive_judge', 0),
126
- 'human_eval_solidity_pass@1': self.results.get('human_eval_solidity_pass@1', 0),
127
- 'human_eval_solidity_pass@3': self.results.get('human_eval_solidity_pass@3', 0)
128
  }
129
 
130
  solbench = 0
@@ -132,8 +132,8 @@ class EvalResult:
132
  if non_zero_scores:
133
  weights = {
134
  'naive_judge': 0.3,
135
- 'human_eval_solidity_pass@1': 0.5,
136
- 'human_eval_solidity_pass@3': 0.2
137
  }
138
  total_weight = sum(weights[k] for k in non_zero_scores)
139
  solbench = sum(scores[k] * weights[k] / total_weight for k in non_zero_scores)
 
42
  with open(json_filepath) as fp:
43
  data = json.load(fp)
44
 
45
+ if 'human_eval_solidity_pass_1' not in data['results']:
46
+ data['results']['human_eval_solidity_pass_1'] = {'score': 0}
47
 
48
+ if 'human_eval_solidity_pass_3' not in data['results']:
49
+ data['results']['human_eval_solidity_pass_3'] = {'score': 0}
50
 
51
  org, model = get_org_and_model_names_from_filepath(json_filepath)
52
  config = data.get("config")
 
123
  # average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
124
  scores = {
125
  'naive_judge': self.results.get('naive_judge', 0),
126
+ 'human_eval_solidity_pass_1': self.results.get('human_eval_solidity_pass_1', 0),
127
+ 'human_eval_solidity_pass_3': self.results.get('human_eval_solidity_pass_3', 0)
128
  }
129
 
130
  solbench = 0
 
132
  if non_zero_scores:
133
  weights = {
134
  'naive_judge': 0.3,
135
+ 'human_eval_solidity_pass_1': 0.5,
136
+ 'human_eval_solidity_pass_3': 0.2
137
  }
138
  total_weight = sum(weights[k] for k in non_zero_scores)
139
  solbench = sum(scores[k] * weights[k] / total_weight for k in non_zero_scores)