pminervini commited on
Commit
f21645c
1 Parent(s): 7e267bf
Files changed (2) hide show
  1. cli/halueval-cli.py +3 -1
  2. src/backend/envs.py +2 -0
cli/halueval-cli.py CHANGED
@@ -8,6 +8,8 @@ from src.backend.manage_requests import EvalRequest
8
  from src.backend.run_eval_suite import run_evaluation
9
 
10
  from src.backend.tasks.xsum.task import XSum
 
 
11
 
12
  from lm_eval.tasks import initialize_tasks, include_task_folder
13
  from lm_eval import tasks, evaluator, utils
@@ -31,7 +33,7 @@ def main():
31
  eval_request = [r for r in eval_requests if 'bloom-560m' in r.model][0]
32
 
33
  # my_task = Task("memo-trap", "acc", "memo-trap", 0)
34
- my_task = Task("xsum", "rougeLsum", "XSum", 2)
35
 
36
  TASKS_HARNESS = [my_task]
37
  # task_names = ['triviaqa']
 
8
  from src.backend.run_eval_suite import run_evaluation
9
 
10
  from src.backend.tasks.xsum.task import XSum
11
+ from src.backend.tasks.cnndm.task import CNNDM
12
+ from src.backend.tasks.selfcheckgpt.task import SelfCheckGpt
13
 
14
  from lm_eval.tasks import initialize_tasks, include_task_folder
15
  from lm_eval import tasks, evaluator, utils
 
33
  eval_request = [r for r in eval_requests if 'bloom-560m' in r.model][0]
34
 
35
  # my_task = Task("memo-trap", "acc", "memo-trap", 0)
36
+ my_task = Task("selfcheckgpt", "avg-selfcheckgpt", "SGPT", 2)
37
 
38
  TASKS_HARNESS = [my_task]
39
  # task_names = ['triviaqa']
src/backend/envs.py CHANGED
@@ -40,6 +40,8 @@ class Tasks(Enum):
40
  task11 = Task("nq8", "em", "NQ Open 8", 8)
41
  task12 = Task("tqa8", "em", "TriviaQA 8", 8)
42
 
 
 
43
  # NUM_FEWSHOT = 64 # Change with your few shot
44
 
45
 
 
40
  task11 = Task("nq8", "em", "NQ Open 8", 8)
41
  task12 = Task("tqa8", "em", "TriviaQA 8", 8)
42
 
43
+ task13 = Task("ifeval", "inst_level_strict_acc", "IFEval", 0)
44
+
45
  # NUM_FEWSHOT = 64 # Change with your few shot
46
 
47