Update HumanEval for Solidity scores
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -42,11 +42,11 @@ class EvalResult:
|
|
42 |
with open(json_filepath) as fp:
|
43 |
data = json.load(fp)
|
44 |
|
45 |
-
if '
|
46 |
-
data['results']['
|
47 |
|
48 |
-
if '
|
49 |
-
data['results']['
|
50 |
|
51 |
org, model = get_org_and_model_names_from_filepath(json_filepath)
|
52 |
config = data.get("config")
|
@@ -123,8 +123,8 @@ class EvalResult:
|
|
123 |
# average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
124 |
scores = {
|
125 |
'naive_judge': self.results.get('naive_judge', 0),
|
126 |
-
'
|
127 |
-
'
|
128 |
}
|
129 |
|
130 |
solbench = 0
|
@@ -132,8 +132,8 @@ class EvalResult:
|
|
132 |
if non_zero_scores:
|
133 |
weights = {
|
134 |
'naive_judge': 0.3,
|
135 |
-
'
|
136 |
-
'
|
137 |
}
|
138 |
total_weight = sum(weights[k] for k in non_zero_scores)
|
139 |
solbench = sum(scores[k] * weights[k] / total_weight for k in non_zero_scores)
|
|
|
42 |
with open(json_filepath) as fp:
|
43 |
data = json.load(fp)
|
44 |
|
45 |
+
if 'human_eval_solidity_pass_1' not in data['results']:
|
46 |
+
data['results']['human_eval_solidity_pass_1'] = {'score': 0}
|
47 |
|
48 |
+
if 'human_eval_solidity_pass_3' not in data['results']:
|
49 |
+
data['results']['human_eval_solidity_pass_3'] = {'score': 0}
|
50 |
|
51 |
org, model = get_org_and_model_names_from_filepath(json_filepath)
|
52 |
config = data.get("config")
|
|
|
123 |
# average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
124 |
scores = {
|
125 |
'naive_judge': self.results.get('naive_judge', 0),
|
126 |
+
'human_eval_solidity_pass_1': self.results.get('human_eval_solidity_pass_1', 0),
|
127 |
+
'human_eval_solidity_pass_3': self.results.get('human_eval_solidity_pass_3', 0)
|
128 |
}
|
129 |
|
130 |
solbench = 0
|
|
|
132 |
if non_zero_scores:
|
133 |
weights = {
|
134 |
'naive_judge': 0.3,
|
135 |
+
'human_eval_solidity_pass_1': 0.5,
|
136 |
+
'human_eval_solidity_pass_3': 0.2
|
137 |
}
|
138 |
total_weight = sum(weights[k] for k in non_zero_scores)
|
139 |
solbench = sum(scores[k] * weights[k] / total_weight for k in non_zero_scores)
|