Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
pminervini
commited on
Commit
·
1109e5f
1
Parent(s):
23681e2
update
Browse files- fix-requests-cli.py +31 -0
- src/backend/run_eval_suite.py +3 -1
fix-requests-cli.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
|
3 |
+
import os
|
4 |
+
import fnmatch
|
5 |
+
|
6 |
+
import json
|
7 |
+
from huggingface_hub import HfApi
|
8 |
+
|
9 |
+
|
10 |
+
def find_json_files(directory):
|
11 |
+
matches = []
|
12 |
+
for root, dirnames, filenames in os.walk(directory):
|
13 |
+
for filename in fnmatch.filter(filenames, '*.json'):
|
14 |
+
matches.append(os.path.join(root, filename))
|
15 |
+
return matches
|
16 |
+
|
17 |
+
|
18 |
+
directory_path = '/Users/pasquale/workspace/eval/requests'
|
19 |
+
json_files = find_json_files(directory_path)
|
20 |
+
|
21 |
+
api = HfApi()
|
22 |
+
model_lst = api.list_models()
|
23 |
+
|
24 |
+
model_lst = [m for m in model_lst]
|
25 |
+
|
26 |
+
id_to_model = {m.id: m for m in model_lst}
|
27 |
+
|
28 |
+
for path in json_files:
|
29 |
+
with open(path, 'r') as file:
|
30 |
+
data = json.load(file)
|
31 |
+
breakpoint()
|
src/backend/run_eval_suite.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
from lm_eval import tasks, evaluator, utils
|
2 |
-
from lm_eval.tasks import initialize_tasks
|
3 |
|
4 |
from src.backend.manage_requests import EvalRequest
|
5 |
|
@@ -12,7 +12,9 @@ def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_siz
|
|
12 |
if limit:
|
13 |
print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
|
14 |
|
|
|
15 |
initialize_tasks('INFO')
|
|
|
16 |
task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)
|
17 |
|
18 |
print(f"Selected Tasks: {task_names}")
|
|
|
1 |
from lm_eval import tasks, evaluator, utils
|
2 |
+
from lm_eval.tasks import initialize_tasks, include_task_folder
|
3 |
|
4 |
from src.backend.manage_requests import EvalRequest
|
5 |
|
|
|
12 |
if limit:
|
13 |
print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
|
14 |
|
15 |
+
include_task_folder("src/backend/tasks/")
|
16 |
initialize_tasks('INFO')
|
17 |
+
|
18 |
task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)
|
19 |
|
20 |
print(f"Selected Tasks: {task_names}")
|