future-xy commited on
Commit
d936aea
1 Parent(s): 88d1c0e

improve local debug

Browse files
Files changed (2) hide show
  1. backend-cli.py +19 -11
  2. src/backend/envs.py +0 -2
backend-cli.py CHANGED
@@ -11,7 +11,7 @@ from datetime import datetime
11
  from src.backend.run_eval_suite import run_evaluation
12
  from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
13
  from src.backend.sort_queue import sort_models_by_priority
14
- from src.backend.envs import Tasks, EVAL_REQUESTS_PATH_BACKEND, EVAL_RESULTS_PATH_BACKEND, DEVICE, LIMIT, Task
15
  from src.backend.manage_requests import EvalRequest
16
  from src.leaderboard.read_evals import EvalResult
17
 
@@ -122,7 +122,7 @@ def request_to_result_name(request: EvalRequest) -> str:
122
  return res
123
 
124
 
125
- def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
126
  batch_size = 1
127
  try:
128
  results = run_evaluation(
@@ -132,7 +132,7 @@ def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
132
  batch_size=batch_size,
133
  device=DEVICE,
134
  use_cache=None,
135
- limit=LIMIT,
136
  )
137
  except RuntimeError as e:
138
  if "No executable batch size found" in str(e):
@@ -144,7 +144,7 @@ def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
144
  batch_size=batch_size,
145
  device=DEVICE,
146
  use_cache=None,
147
- limit=LIMIT,
148
  )
149
  else:
150
  raise
@@ -395,6 +395,12 @@ def process_pending_requests() -> bool:
395
  def get_args():
396
  parser = argparse.ArgumentParser(description="Run the backend")
397
  parser.add_argument("--debug", action="store_true", help="Run in debug mode")
 
 
 
 
 
 
398
  return parser.parse_args()
399
 
400
 
@@ -403,11 +409,8 @@ if __name__ == "__main__":
403
  local_debug = args.debug
404
  # debug specific task by ping
405
  if local_debug:
406
- # debug_model_names = ["mistralai/Mixtral-8x7B-Instruct-v0.1"]
407
- debug_model_names = ["facebook/opt-1.3b"]
408
- # debug_model_names = ["TheBloke/Mixtral-8x7B-v0.1-GPTQ"]
409
- debug_task_name = 'selfcheckgpt'
410
- # debug_task_name = "mmlu"
411
  task_lst = TASKS_HARNESS.copy()
412
  for task in task_lst:
413
  for debug_model_name in debug_model_names:
@@ -415,9 +418,14 @@ if __name__ == "__main__":
415
  if task_name != debug_task_name:
416
  continue
417
  eval_request = EvalRequest(
418
- model=debug_model_name, private=False, status="", json_filepath="", precision="float16", inference_framework="hf-chat"
 
 
 
 
 
419
  )
420
- results = process_evaluation(task, eval_request)
421
  else:
422
  while True:
423
  res = False
 
11
  from src.backend.run_eval_suite import run_evaluation
12
  from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
13
  from src.backend.sort_queue import sort_models_by_priority
14
+ from src.backend.envs import Tasks, EVAL_REQUESTS_PATH_BACKEND, EVAL_RESULTS_PATH_BACKEND, DEVICE, Task
15
  from src.backend.manage_requests import EvalRequest
16
  from src.leaderboard.read_evals import EvalResult
17
 
 
122
  return res
123
 
124
 
125
+ def process_evaluation(task: Task, eval_request: EvalRequest, limit: Optional[int] = None) -> dict:
126
  batch_size = 1
127
  try:
128
  results = run_evaluation(
 
132
  batch_size=batch_size,
133
  device=DEVICE,
134
  use_cache=None,
135
+ limit=limit,
136
  )
137
  except RuntimeError as e:
138
  if "No executable batch size found" in str(e):
 
144
  batch_size=batch_size,
145
  device=DEVICE,
146
  use_cache=None,
147
+ limit=limit,
148
  )
149
  else:
150
  raise
 
395
  def get_args():
396
  parser = argparse.ArgumentParser(description="Run the backend")
397
  parser.add_argument("--debug", action="store_true", help="Run in debug mode")
398
+ # debug parameters
399
+ parser.add_argument("--task", type=str, default="selfcheckgpt", help="Task to debug")
400
+ parser.add_argument("--model", type=str, default="facebook/opt-1.3b", help="Model to debug")
401
+ parser.add_argument("--precision", type=str, default="float16", help="Precision to debug")
402
+ parser.add_argument("--inference-framework", type=str, default="hf-chat", help="Inference framework to debug")
403
+ parser.add_argument("--limit", type=int, default=None, help="Limit for the number of samples")
404
  return parser.parse_args()
405
 
406
 
 
409
  local_debug = args.debug
410
  # debug specific task by ping
411
  if local_debug:
412
+ debug_model_names = [args.model] # Use model from arguments
413
+ debug_task_name = args.task # Use task from arguments
 
 
 
414
  task_lst = TASKS_HARNESS.copy()
415
  for task in task_lst:
416
  for debug_model_name in debug_model_names:
 
418
  if task_name != debug_task_name:
419
  continue
420
  eval_request = EvalRequest(
421
+ model=debug_model_name,
422
+ private=False,
423
+ status="",
424
+ json_filepath="",
425
+ precision=args.precision, # Use precision from arguments
426
+ inference_framework=args.inference_framework # Use inference framework from arguments
427
  )
428
+ results = process_evaluation(task, eval_request, limit=args.limit)
429
  else:
430
  while True:
431
  res = False
src/backend/envs.py CHANGED
@@ -64,5 +64,3 @@ EVAL_REQUESTS_PATH_BACKEND_SYNC = os.path.join(CACHE_PATH, "eval-queue-bk-sync")
64
  EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
65
 
66
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
67
-
68
- LIMIT = None # Testing; needs to be None
 
64
  EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
65
 
66
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"