meg-huggingface commited on
Commit
8cd9975
·
1 Parent(s): e6dead6

changing batch size to auto

Browse files
main_backend_harness.py CHANGED
@@ -70,7 +70,7 @@ def run_auto_eval():
70
  num_fewshot=NUM_FEWSHOT,
71
  local_dir=EVAL_RESULTS_PATH_BACKEND,
72
  results_repo=RESULTS_REPO,
73
- batch_size=1,
74
  device=DEVICE,
75
  no_cache=True,
76
  limit=LIMIT
 
70
  num_fewshot=NUM_FEWSHOT,
71
  local_dir=EVAL_RESULTS_PATH_BACKEND,
72
  results_repo=RESULTS_REPO,
73
+ batch_size='auto',
74
  device=DEVICE,
75
  no_cache=True,
76
  limit=LIMIT
src/backend/manage_requests.py CHANGED
@@ -2,6 +2,7 @@ import glob
2
  import json
3
  from dataclasses import dataclass
4
  from typing import Optional
 
5
 
6
  from huggingface_hub import HfApi, snapshot_download
7
  from src.envs import TOKEN
@@ -87,6 +88,7 @@ def get_eval_requests(job_status: list, local_dir: str, hf_repo: str) -> list[Ev
87
  for json_filepath in json_files:
88
  with open(json_filepath) as fp:
89
  data = json.load(fp)
 
90
  if data["status"] in job_status:
91
  data["json_filepath"] = json_filepath
92
  print(data.items())
@@ -96,6 +98,30 @@ def get_eval_requests(job_status: list, local_dir: str, hf_repo: str) -> list[Ev
96
  return eval_requests
97
 
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  def check_completed_evals(
100
  api: HfApi,
101
  hf_repo: str,
@@ -126,7 +152,9 @@ def check_completed_evals(
126
  )
127
  set_eval_request(api, eval_request, completed_status, hf_repo, local_dir)
128
  else:
129
- logger.info(
130
- f"No result file found for {model} setting it to {failed_status}"
131
- )
132
- set_eval_request(api, eval_request, failed_status, hf_repo, local_dir)
 
 
 
2
  import json
3
  from dataclasses import dataclass
4
  from typing import Optional
5
+ from datetime import datetime, timezone
6
 
7
  from huggingface_hub import HfApi, snapshot_download
8
  from src.envs import TOKEN
 
88
  for json_filepath in json_files:
89
  with open(json_filepath) as fp:
90
  data = json.load(fp)
91
+ # TODO: isn't job_status the string "RUNNING"?
92
  if data["status"] in job_status:
93
  data["json_filepath"] = json_filepath
94
  print(data.items())
 
98
  return eval_requests
99
 
100
 
101
+ def check_set_to_fail(eval_request: EvalRequest):
102
+ """Checks how long a pending eval request has been running"""
103
+ json_filepath = eval_request.json_filepath
104
+
105
+ with open(json_filepath) as fp:
106
+ data = json.load(fp)
107
+
108
+ status = data["status"]
109
+ if status == "PENDING" or status == "RUNNING":
110
+ time_format = "%Y-%m-%dT%H:%M:%SZ"
111
+ submitted_time_str = data["submitted_time"]
112
+ submitted_time_naive = datetime.strptime(submitted_time_str, time_format)
113
+ current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
114
+ submitted_time = submitted_time_naive.replace(tzinfo=current_time.tzinfo)
115
+ difference = current_time - submitted_time
116
+ diff_seconds = difference.total_seconds()
117
+ # If it's been running for less than 2 hours, leave it alone.
118
+ if diff_seconds < 7200:
119
+ return False
120
+ else:
121
+ return True
122
+ return True
123
+
124
+
125
  def check_completed_evals(
126
  api: HfApi,
127
  hf_repo: str,
 
152
  )
153
  set_eval_request(api, eval_request, completed_status, hf_repo, local_dir)
154
  else:
155
+ set_to_fail = check_set_to_fail(eval_request)
156
+ if set_to_fail:
157
+ logger.info(
158
+ f"No result file found for {model} setting it to {failed_status}"
159
+ )
160
+ set_eval_request(api, eval_request, failed_status, hf_repo, local_dir)