Spaces:
Paused
Paused
Updated run eval suite
Browse files
src/backend/run_eval_suite_lighteval.py
CHANGED
@@ -2,6 +2,7 @@ import json
|
|
2 |
import os
|
3 |
import logging
|
4 |
from datetime import datetime
|
|
|
5 |
|
6 |
from lighteval.main_accelerate import main, EnvConfig, create_model_config, load_model
|
7 |
|
@@ -14,7 +15,7 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
|
|
14 |
if limit:
|
15 |
print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
|
16 |
|
17 |
-
args = {
|
18 |
"endpoint_model_name":f"{eval_request.model}_{eval_request.precision}".lower(),
|
19 |
"accelerator": accelerator,
|
20 |
"vendor": vendor,
|
@@ -33,7 +34,7 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
|
|
33 |
"override_batch_size": batch_size,
|
34 |
"custom_tasks": "custom_tasks.py",
|
35 |
"tasks": task_names
|
36 |
-
}
|
37 |
|
38 |
try:
|
39 |
results = main(args)
|
|
|
2 |
import os
|
3 |
import logging
|
4 |
from datetime import datetime
|
5 |
+
from argparse import Namespace
|
6 |
|
7 |
from lighteval.main_accelerate import main, EnvConfig, create_model_config, load_model
|
8 |
|
|
|
15 |
if limit:
|
16 |
print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
|
17 |
|
18 |
+
args = Namespace(**{
|
19 |
"endpoint_model_name":f"{eval_request.model}_{eval_request.precision}".lower(),
|
20 |
"accelerator": accelerator,
|
21 |
"vendor": vendor,
|
|
|
34 |
"override_batch_size": batch_size,
|
35 |
"custom_tasks": "custom_tasks.py",
|
36 |
"tasks": task_names
|
37 |
+
})
|
38 |
|
39 |
try:
|
40 |
results = main(args)
|