Spaces:
Runtime error
Runtime error
pminervini
commited on
Commit
•
3be882c
1
Parent(s):
196121f
update
Browse files
src/backend/run_eval_suite.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
from lm_eval import tasks, evaluator, utils
|
2 |
-
from lm_eval.tasks import
|
3 |
|
4 |
from src.backend.manage_requests import EvalRequest
|
5 |
|
@@ -16,13 +16,16 @@ def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_siz
|
|
16 |
if limit:
|
17 |
print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
|
18 |
|
19 |
-
include_task_folder("src/backend/tasks/")
|
20 |
-
initialize_tasks('INFO')
|
|
|
|
|
|
|
21 |
|
22 |
print(f"Considered Tasks: {task_names}")
|
23 |
-
print(f"Allowed Tasks: {tasks.ALL_TASKS}")
|
24 |
|
25 |
-
task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)
|
26 |
|
27 |
print(f"Selected Tasks: {task_names}")
|
28 |
print(f"Eval Request: {eval_request.get_model_args()}")
|
@@ -36,7 +39,8 @@ def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_siz
|
|
36 |
device=device,
|
37 |
use_cache=use_cache,
|
38 |
limit=limit,
|
39 |
-
write_out=True
|
|
|
40 |
|
41 |
results["config"]["model_dtype"] = eval_request.precision
|
42 |
results["config"]["model_name"] = eval_request.model
|
|
|
1 |
from lm_eval import tasks, evaluator, utils
|
2 |
+
from lm_eval.tasks import TaskManager
|
3 |
|
4 |
from src.backend.manage_requests import EvalRequest
|
5 |
|
|
|
16 |
if limit:
|
17 |
print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
|
18 |
|
19 |
+
# include_task_folder("src/backend/tasks/")
|
20 |
+
# initialize_tasks('INFO')
|
21 |
+
|
22 |
+
task_manager = TaskManager(include_path="./src/backend/tasks/")
|
23 |
+
# task_manager.initialize_tasks('INFO')
|
24 |
|
25 |
print(f"Considered Tasks: {task_names}")
|
26 |
+
# print(f"Allowed Tasks: {tasks.ALL_TASKS}")
|
27 |
|
28 |
+
# task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)
|
29 |
|
30 |
print(f"Selected Tasks: {task_names}")
|
31 |
print(f"Eval Request: {eval_request.get_model_args()}")
|
|
|
39 |
device=device,
|
40 |
use_cache=use_cache,
|
41 |
limit=limit,
|
42 |
+
write_out=True,
|
43 |
+
task_manager=task_manager)
|
44 |
|
45 |
results["config"]["model_dtype"] = eval_request.precision
|
46 |
results["config"]["model_name"] = eval_request.model
|