Spaces:
Runtime error
Runtime error
pminervini
commited on
Commit
•
bcdca08
1
Parent(s):
f41876f
update
Browse files- cli/halueval-cli.py +1 -1
- src/backend/envs.py +6 -3
cli/halueval-cli.py
CHANGED
@@ -42,7 +42,7 @@ def main():
|
|
42 |
for task in TASKS_HARNESS:
|
43 |
print(f"Selected Tasks: [{task}]")
|
44 |
results = evaluator.simple_evaluate(model="hf", model_args=eval_request.get_model_args(), tasks=[task.benchmark], num_fewshot=1,
|
45 |
-
batch_size=1, device="mps", use_cache=None, limit=
|
46 |
print('AAA', results["results"])
|
47 |
|
48 |
breakpoint()
|
|
|
42 |
for task in TASKS_HARNESS:
|
43 |
print(f"Selected Tasks: [{task}]")
|
44 |
results = evaluator.simple_evaluate(model="hf", model_args=eval_request.get_model_args(), tasks=[task.benchmark], num_fewshot=1,
|
45 |
+
batch_size=1, device="mps", use_cache=None, limit=10, write_out=True)
|
46 |
print('AAA', results["results"])
|
47 |
|
48 |
breakpoint()
|
src/backend/envs.py
CHANGED
@@ -22,9 +22,12 @@ class Tasks(Enum):
|
|
22 |
# task1 = Task("logiqa", "acc_norm", "LogiQA")
|
23 |
task0 = Task("nq_open", "em", "NQ Open", 64) # 64, as in the ATLAS paper
|
24 |
task1 = Task("triviaqa", "em", "TriviaQA", 64) # 64, as in the ATLAS paper
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
28 |
|
29 |
# NUM_FEWSHOT = 64 # Change with your few shot
|
30 |
|
|
|
22 |
# task1 = Task("logiqa", "acc_norm", "LogiQA")
|
23 |
task0 = Task("nq_open", "em", "NQ Open", 64) # 64, as in the ATLAS paper
|
24 |
task1 = Task("triviaqa", "em", "TriviaQA", 64) # 64, as in the ATLAS paper
|
25 |
+
# TruthfulQA is intended as a zero-shot benchmark [5, 47]. https://owainevans.github.io/pdfs/truthfulQA_lin_evans.pdf
|
26 |
+
# task2 = Task("truthfulqa_gen", "rougeL_acc", "TruthfulQA Gen", 0)
|
27 |
+
task3 = Task("truthfulqa_mc1", "acc", "TruthfulQA MC1", 0)
|
28 |
+
task4 = Task("truthfulqa_mc2", "acc", "TruthfulQA MC2", 0)
|
29 |
+
task5 = Task("halueval_qa", "acc", "HaluEval QA", 0)
|
30 |
+
# task6 = Task("xsum", "rougeL_acc", "XSum", 8)
|
31 |
|
32 |
# NUM_FEWSHOT = 64 # Change with your few shot
|
33 |
|