Update scores
Browse files- src/about.py +9 -8
- src/leaderboard/read_evals.py +2 -1
src/about.py
CHANGED
@@ -17,14 +17,15 @@ class Task:
|
|
17 |
# ---------------------------------------------------
|
18 |
class Tasks(Enum):
|
19 |
# task_key, metric_key, title
|
20 |
-
task00 = Task("naive_judge", "score", "NaïveJudge
|
21 |
-
task01 = Task("
|
22 |
-
task02 = Task("
|
23 |
-
task03 = Task("
|
24 |
-
task04 = Task("
|
25 |
-
task05 = Task("
|
26 |
-
task06 = Task("
|
27 |
-
task07 = Task("
|
|
|
28 |
# ---------------------------------------------------
|
29 |
|
30 |
# Your leaderboard name
|
|
|
17 |
# ---------------------------------------------------
|
18 |
class Tasks(Enum):
|
19 |
# task_key, metric_key, title
|
20 |
+
task00 = Task("naive_judge", "score", "NaïveJudge")
|
21 |
+
task01 = Task("human_eval_solidity_pass@1", "score", "HumanEval for Solidity (pass@1)")
|
22 |
+
task02 = Task("human_eval_solidity_pass@3", "score", "HumanEval for Solidity (pass@3)")
|
23 |
+
task03 = Task("rouge1", "score", "ROUGE-unigrams")
|
24 |
+
task04 = Task("rouge2", "score", "ROUGE-bigrams")
|
25 |
+
task05 = Task("rougeL", "score", "ROUGE-Longest Common Subsequence")
|
26 |
+
task06 = Task("rougeLsum", "score", "ROUGE-Lsum")
|
27 |
+
task07 = Task("bleu", "score", "Bleu")
|
28 |
+
task08 = Task("brevity_penalty", "score", "Brevity Penalty")
|
29 |
# ---------------------------------------------------
|
30 |
|
31 |
# Your leaderboard name
|
src/leaderboard/read_evals.py
CHANGED
@@ -117,7 +117,8 @@ class EvalResult:
|
|
117 |
# average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
118 |
solbench = (
|
119 |
self.results.get('naive_judge', 0) * 0.3 +
|
120 |
-
self.results.get('
|
|
|
121 |
)
|
122 |
data_dict = {
|
123 |
"eval_name": self.eval_name, # not a column, just a save name,
|
|
|
117 |
# average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
118 |
solbench = (
|
119 |
self.results.get('naive_judge', 0) * 0.3 +
|
120 |
+
self.results.get('human_eval_solidity_pass@1', 0) * 0.5 +
|
121 |
+
self.results.get('human_eval_solidity_pass@3', 0) * 0.2
|
122 |
)
|
123 |
data_dict = {
|
124 |
"eval_name": self.eval_name, # not a column, just a save name,
|