dvilasuero HF Staff commited on
Commit
005831e
·
verified ·
1 Parent(s): 0355669

Upload runner.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. runner.py +145 -0
runner.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # /// script
3
+ # requires-python = ">=3.10"
4
+ # dependencies = [
5
+ # "inspect-ai @ git+https://github.com/dvsrepo/inspect_ai.git@fallback-to-modified-for-hf-fs",
6
+ # "datasets",
7
+ # "openai",
8
+ # "transformers",
9
+ # "accelerate",
10
+ # "huggingface_hub",
11
+ # "inspect-evals",
12
+ # "pandas",
13
+ # "pyarrow",
14
+ # ]
15
+ # ///
16
+
17
+ import os
18
+ import sys
19
+ import subprocess
20
+ import tempfile
21
+ import urllib.request
22
+ from pathlib import Path
23
+
24
+ from inspect_ai.analysis import evals_df, samples_df
25
+
26
+
27
+ def export_logs_to_parquet(log_dir: str, dataset_repo: str) -> None:
28
+ from huggingface_hub import HfApi
29
+
30
+ hf_token = os.getenv("HF_TOKEN")
31
+ if not hf_token:
32
+ raise ValueError("HF_TOKEN environment variable not set")
33
+
34
+ api = HfApi(token=hf_token)
35
+
36
+ repo_id = (
37
+ dataset_repo.replace("datasets/", "")
38
+ if dataset_repo.startswith("datasets/")
39
+ else dataset_repo
40
+ )
41
+
42
+ evals = evals_df(logs=log_dir)
43
+ samples = samples_df(logs=log_dir)
44
+
45
+ with tempfile.TemporaryDirectory() as tmpdir:
46
+ evals_path = Path(tmpdir) / "evals.parquet"
47
+ samples_path = Path(tmpdir) / "samples.parquet"
48
+
49
+ evals.to_parquet(evals_path, index=False, engine="pyarrow")
50
+ samples.to_parquet(samples_path, index=False, engine="pyarrow")
51
+
52
+ api.upload_file(
53
+ path_or_fileobj=str(evals_path),
54
+ path_in_repo="evals.parquet",
55
+ repo_id=repo_id,
56
+ repo_type="dataset",
57
+ token=hf_token,
58
+ )
59
+
60
+ api.upload_file(
61
+ path_or_fileobj=str(samples_path),
62
+ path_in_repo="samples.parquet",
63
+ repo_id=repo_id,
64
+ repo_type="dataset",
65
+ token=hf_token,
66
+ )
67
+
68
+
69
+ if __name__ == "__main__":
70
+ if len(sys.argv) < 4:
71
+ print("Usage: eval_runner.py <eval_ref> <model> <dataset_repo> [--inspect-evals] [extra_args...]")
72
+ sys.exit(1)
73
+
74
+ eval_ref = sys.argv[1]
75
+ model = sys.argv[2]
76
+ dataset_repo = sys.argv[3]
77
+
78
+ is_inspect_evals = "--inspect-evals" in sys.argv
79
+ extra_args = [arg for arg in sys.argv[4:] if arg != "--inspect-evals"]
80
+
81
+ if not dataset_repo.startswith("datasets/"):
82
+ dataset_repo = f"datasets/{dataset_repo}"
83
+ log_dir = f"hf://{dataset_repo}/logs"
84
+
85
+ is_eval_set = "," in model
86
+
87
+ if is_inspect_evals:
88
+ eval_target = eval_ref
89
+ cleanup_file = None
90
+ else:
91
+ print("Downloading eval script...")
92
+ with urllib.request.urlopen(eval_ref) as response:
93
+ eval_code = response.read().decode("utf-8")
94
+
95
+ eval_filename = "downloaded_eval.py"
96
+ with open(eval_filename, "w") as f:
97
+ f.write(eval_code)
98
+
99
+ eval_target = eval_filename
100
+ cleanup_file = eval_filename
101
+
102
+ is_eval_set = "," in model
103
+
104
+ try:
105
+ if is_eval_set:
106
+ print("Running evaluation set...")
107
+ cmd = [
108
+ "inspect",
109
+ "eval-set",
110
+ eval_target,
111
+ "--model",
112
+ model,
113
+ "--log-dir",
114
+ log_dir,
115
+ "--log-shared",
116
+ "--log-buffer",
117
+ "100",
118
+ ]
119
+ else:
120
+ print("Running evaluation...")
121
+ cmd = [
122
+ "inspect",
123
+ "eval",
124
+ eval_target,
125
+ "--model",
126
+ model,
127
+ "--log-dir",
128
+ log_dir,
129
+ "--log-shared",
130
+ "--log-buffer",
131
+ "100",
132
+ ]
133
+ cmd.extend(extra_args)
134
+
135
+ subprocess.run(cmd, check=True)
136
+
137
+ print("Exporting logs to parquet...")
138
+ try:
139
+ export_logs_to_parquet(log_dir, dataset_repo)
140
+ except Exception as e:
141
+ print(f"Warning: Could not export to parquet: {e}")
142
+
143
+ finally:
144
+ if cleanup_file and os.path.exists(cleanup_file):
145
+ os.unlink(cleanup_file)