Fix solbench score calc
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -121,11 +121,23 @@ class EvalResult:
|
|
121 |
def to_dict(self):
|
122 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
123 |
# average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
124 |
-
|
125 |
-
self.results.get('naive_judge', 0)
|
126 |
-
self.results.get('human_eval_solidity_pass@1', 0)
|
127 |
-
self.results.get('human_eval_solidity_pass@3', 0)
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
data_dict = {
|
130 |
"eval_name": self.eval_name, # not a column, just a save name,
|
131 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
|
|
121 |
def to_dict(self):
|
122 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
123 |
# average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
124 |
+
scores = {
|
125 |
+
'naive_judge': self.results.get('naive_judge', 0),
|
126 |
+
'human_eval_solidity_pass@1': self.results.get('human_eval_solidity_pass@1', 0),
|
127 |
+
'human_eval_solidity_pass@3': self.results.get('human_eval_solidity_pass@3', 0)
|
128 |
+
}
|
129 |
+
|
130 |
+
solbench = 0
|
131 |
+
non_zero_scores = {k: v for k, v in scores.items() if v != 0}
|
132 |
+
if non_zero_scores:
|
133 |
+
weights = {
|
134 |
+
'naive_judge': 0.3,
|
135 |
+
'human_eval_solidity_pass@1': 0.5,
|
136 |
+
'human_eval_solidity_pass@3': 0.2
|
137 |
+
}
|
138 |
+
total_weight = sum(weights[k] for k in non_zero_scores)
|
139 |
+
solbench = sum(scores[k] * weights[k] / total_weight for k in non_zero_scores)
|
140 |
+
|
141 |
data_dict = {
|
142 |
"eval_name": self.eval_name, # not a column, just a save name,
|
143 |
AutoEvalColumn.precision.name: self.precision.value.name,
|