AppleSwing commited on
Commit
5ead910
1 Parent(s): c3fc5ce

add choice for device, and verify in backend. Add debug mode (#18)

Browse files

- Add app debug mode and dynamic refresh tables (2a18e0ad941b517867200352ba49273da53f5907)
- Merge branch 'main' into pr/15 (a4829c27aeca63dc5327ec3a4287eb66bb2cbde8)
- Add inference_framework to the queue column (86b14ca786017b5479b54e4402226f7597295729)
- Add requirements (f5ff85d527bfa186b8e105d5637ac4e3793a9721)
- Merge branch 'main' into pr/15 (08b56fc73f03f150ef1baa35f78e762dcbf83fd5)
- Merge branch 'pr/15' into pr/18 (b2a2a5bae92f4b80223988e2059a69dfac7caaa8)
- Add GPU types (60d9c33965a34f63d2026b722afa33c03fe48306)
- Delete requests (22ce8a7836b70c1849ec4aeb77be3fce2642bcab)
- add choices for GPU and Solve leaderboard issue (bc48941fdfee36d8d1510a96b2969daa5d1ebf3a)
- fix a bug (6e99f9d4535fd801ae6b675ef2d833cc109e9d74)
- Apply GPU type verification on backend debug mode (dbe8db4df45ec9d75a8ce5abd46b77ff2e7627b7)
- Fix a bug (0fb715c8b89cef41ec9497c09b6ad8db47f65d78)

app.py CHANGED
@@ -2,10 +2,11 @@
2
  import os
3
  import datetime
4
  import socket
 
5
 
6
  import gradio as gr
7
  import pandas as pd
8
-
9
  from apscheduler.schedulers.background import BackgroundScheduler
10
 
11
  from huggingface_hub import snapshot_download
@@ -35,13 +36,27 @@ from src.display.utils import (
35
  fields,
36
  WeightType,
37
  Precision,
 
38
  )
39
 
40
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
 
41
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
42
  from src.submission.submit import add_new_eval
43
  from src.utils import get_dataset_summary_table
44
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  def ui_snapshot_download(repo_id, local_dir, repo_type, tqdm_class, etag_timeout):
47
  try:
@@ -75,11 +90,6 @@ def init_space():
75
  )
76
  return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
77
 
78
-
79
- dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
80
- leaderboard_df = original_df.copy()
81
-
82
-
83
  # Searching and filtering
84
  def update_table(
85
  hidden_df: pd.DataFrame, columns: list, type_query: list, precision_query: list, size_query: list, query: str
@@ -142,6 +152,51 @@ def filter_models(df: pd.DataFrame, type_query: list, size_query: list, precisio
142
 
143
  return filtered_df
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
  # triggered only once at startup => read query parameter if it exists
147
  def load_query(request: gr.Request):
@@ -162,7 +217,7 @@ with demo:
162
  search_bar = gr.Textbox(
163
  placeholder=" 🔍 Model search (separate multiple queries with `;`)",
164
  show_label=False,
165
- elem_id="search-bar",
166
  )
167
  with gr.Row():
168
  shown_columns = gr.CheckboxGroup(
@@ -251,14 +306,14 @@ with demo:
251
  filter_columns_size,
252
  search_bar,
253
  ],
254
- leaderboard_table,
255
  )
256
 
257
  # Check query parameter once at startup and update search bar
258
  demo.load(load_query, inputs=[], outputs=[search_bar])
259
 
260
  for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size]:
261
- selector.change(
262
  update_table,
263
  [
264
  hidden_leaderboard_table_for_search,
@@ -323,6 +378,15 @@ with demo:
323
  value=None,
324
  interactive=True,
325
  )
 
 
 
 
 
 
 
 
 
326
 
327
  with gr.Row():
328
  with gr.Column():
@@ -358,6 +422,7 @@ with demo:
358
 
359
  submit_button = gr.Button("Submit Eval")
360
  submission_result = gr.Markdown()
 
361
  submit_button.click(
362
  add_new_eval,
363
  [
@@ -369,6 +434,8 @@ with demo:
369
  weight_type,
370
  model_type,
371
  inference_framework,
 
 
372
  ],
373
  submission_result,
374
  )
@@ -385,8 +452,7 @@ with demo:
385
 
386
  scheduler = BackgroundScheduler()
387
 
388
- scheduler.add_job(restart_space, "interval", seconds=6 * 60 * 60)
389
-
390
 
391
  def launch_backend():
392
  import subprocess
@@ -395,8 +461,9 @@ def launch_backend():
395
  if DEVICE not in {"cpu"}:
396
  _ = subprocess.run(["python", "backend-cli.py"])
397
 
398
-
399
  # scheduler.add_job(launch_backend, "interval", seconds=120)
400
-
401
- scheduler.start()
402
- demo.queue(default_concurrency_limit=40).launch()
 
 
2
  import os
3
  import datetime
4
  import socket
5
+ from threading import Thread
6
 
7
  import gradio as gr
8
  import pandas as pd
9
+ import time
10
  from apscheduler.schedulers.background import BackgroundScheduler
11
 
12
  from huggingface_hub import snapshot_download
 
36
  fields,
37
  WeightType,
38
  Precision,
39
+ GPUType
40
  )
41
 
42
+ from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, \
43
+ QUEUE_REPO, REPO_ID, RESULTS_REPO, DEBUG_QUEUE_REPO, DEBUG_RESULTS_REPO
44
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
45
  from src.submission.submit import add_new_eval
46
  from src.utils import get_dataset_summary_table
47
 
48
+ def get_args():
49
+ import argparse
50
+
51
+ parser = argparse.ArgumentParser(description="Run the LLM Leaderboard")
52
+ parser.add_argument("--debug", action="store_true", help="Run in debug mode")
53
+ return parser.parse_args()
54
+
55
+ args = get_args()
56
+ if args.debug:
57
+ print("Running in debug mode")
58
+ QUEUE_REPO = DEBUG_QUEUE_REPO
59
+ RESULTS_REPO = DEBUG_RESULTS_REPO
60
 
61
  def ui_snapshot_download(repo_id, local_dir, repo_type, tqdm_class, etag_timeout):
62
  try:
 
90
  )
91
  return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
92
 
 
 
 
 
 
93
  # Searching and filtering
94
  def update_table(
95
  hidden_df: pd.DataFrame, columns: list, type_query: list, precision_query: list, size_query: list, query: str
 
152
 
153
  return filtered_df
154
 
155
+ shown_columns = None
156
+ dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
157
+ leaderboard_df = original_df.copy()
158
+
159
+ # def update_leaderboard_table():
160
+ # global leaderboard_df, shown_columns
161
+ # print("Updating leaderboard table")
162
+ # return leaderboard_df[
163
+ # [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
164
+ # + shown_columns.value
165
+ # + [AutoEvalColumn.dummy.name]
166
+ # ] if not leaderboard_df.empty else leaderboard_df
167
+
168
+
169
+ # def update_hidden_leaderboard_table():
170
+ # global original_df
171
+ # return original_df[COLS] if original_df.empty is False else original_df
172
+
173
+ # def update_dataset_table():
174
+ # global dataset_df
175
+ # return dataset_df
176
+
177
+ # def update_finish_table():
178
+ # global finished_eval_queue_df
179
+ # return finished_eval_queue_df
180
+
181
+ # def update_running_table():
182
+ # global running_eval_queue_df
183
+ # return running_eval_queue_df
184
+
185
+ # def update_pending_table():
186
+ # global pending_eval_queue_df
187
+ # return pending_eval_queue_df
188
+
189
+ # def update_finish_num():
190
+ # global finished_eval_queue_df
191
+ # return len(finished_eval_queue_df)
192
+
193
+ # def update_running_num():
194
+ # global running_eval_queue_df
195
+ # return len(running_eval_queue_df)
196
+
197
+ # def update_pending_num():
198
+ # global pending_eval_queue_df
199
+ # return len(pending_eval_queue_df)
200
 
201
  # triggered only once at startup => read query parameter if it exists
202
  def load_query(request: gr.Request):
 
217
  search_bar = gr.Textbox(
218
  placeholder=" 🔍 Model search (separate multiple queries with `;`)",
219
  show_label=False,
220
+ elem_id="search-bar"
221
  )
222
  with gr.Row():
223
  shown_columns = gr.CheckboxGroup(
 
306
  filter_columns_size,
307
  search_bar,
308
  ],
309
+ leaderboard_table
310
  )
311
 
312
  # Check query parameter once at startup and update search bar
313
  demo.load(load_query, inputs=[], outputs=[search_bar])
314
 
315
  for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size]:
316
+ selector.select(
317
  update_table,
318
  [
319
  hidden_leaderboard_table_for_search,
 
378
  value=None,
379
  interactive=True,
380
  )
381
+
382
+ gpu_type = gr.Dropdown(
383
+ choices=[t.to_str() for t in GPUType],
384
+ label="GPU type",
385
+ multiselect=False,
386
+ value="NVIDIA-A100-PCIe-80GB",
387
+ interactive=True,
388
+ )
389
+
390
 
391
  with gr.Row():
392
  with gr.Column():
 
422
 
423
  submit_button = gr.Button("Submit Eval")
424
  submission_result = gr.Markdown()
425
+ debug = gr.Checkbox(value=args.debug, label="Debug", visible=False)
426
  submit_button.click(
427
  add_new_eval,
428
  [
 
434
  weight_type,
435
  model_type,
436
  inference_framework,
437
+ debug,
438
+ gpu_type
439
  ],
440
  submission_result,
441
  )
 
452
 
453
  scheduler = BackgroundScheduler()
454
 
455
+ scheduler.add_job(restart_space, "interval", hours=6)
 
456
 
457
  def launch_backend():
458
  import subprocess
 
461
  if DEVICE not in {"cpu"}:
462
  _ = subprocess.run(["python", "backend-cli.py"])
463
 
464
+ # Thread(target=periodic_init, daemon=True).start()
465
  # scheduler.add_job(launch_backend, "interval", seconds=120)
466
+ if __name__ == "__main__":
467
+ scheduler.start()
468
+ demo.queue(default_concurrency_limit=40).launch()
469
+
backend-cli.py CHANGED
@@ -16,13 +16,13 @@ from src.backend.envs import Tasks, EVAL_REQUESTS_PATH_BACKEND, EVAL_RESULTS_PAT
16
  from src.backend.manage_requests import EvalRequest
17
  from src.leaderboard.read_evals import EvalResult
18
 
19
- from src.envs import QUEUE_REPO, RESULTS_REPO, API
20
  from src.utils import my_snapshot_download, analyze_gpu_stats, parse_nvidia_smi, monitor_gpus
21
 
22
  from src.leaderboard.read_evals import get_raw_eval_results
23
 
24
  from typing import Optional
25
-
26
  import time
27
 
28
  import pprint
@@ -126,6 +126,9 @@ def request_to_result_name(request: EvalRequest) -> str:
126
  def process_evaluation(task: Task, eval_request: EvalRequest, limit: Optional[int] = None) -> dict:
127
  batch_size = 1
128
  batch_size = eval_request.batch_size
 
 
 
129
 
130
  init_gpu_info = analyze_gpu_stats(parse_nvidia_smi())
131
  # if init_gpu_info['Mem(M)'] > 500:
@@ -364,9 +367,22 @@ def maybe_refresh_results(thr: int, hard_task_lst: Optional[list[str]] = None) -
364
  return False
365
 
366
 
 
 
 
 
 
 
 
 
 
 
367
  def process_pending_requests() -> bool:
 
 
 
368
  sanity_checks()
369
-
370
  current_pending_status = [PENDING_STATUS]
371
 
372
  # Get all eval request that are PENDING, if you want to run other evals, change this parameter
@@ -385,6 +401,12 @@ def process_pending_requests() -> bool:
385
 
386
  eval_request = eval_requests[0]
387
  pp.pprint(eval_request)
 
 
 
 
 
 
388
 
389
  my_snapshot_download(
390
  repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60
@@ -426,6 +448,8 @@ def get_args():
426
  parser.add_argument("--precision", type=str, default="float32,float16,8bit,4bit", help="Precision to debug")
427
  parser.add_argument("--inference-framework", type=str, default="hf-chat", help="Inference framework to debug")
428
  parser.add_argument("--limit", type=int, default=None, help="Limit for the number of samples")
 
 
429
  return parser.parse_args()
430
 
431
 
@@ -454,8 +478,13 @@ if __name__ == "__main__":
454
  status="",
455
  json_filepath="",
456
  precision=precision, # Use precision from arguments
457
- inference_framework=args.inference_framework # Use inference framework from arguments
 
458
  )
 
 
 
 
459
  results = process_evaluation(task, eval_request, limit=args.limit)
460
  except Exception as e:
461
  print(f"debug running error: {e}")
 
16
  from src.backend.manage_requests import EvalRequest
17
  from src.leaderboard.read_evals import EvalResult
18
 
19
+ from src.envs import QUEUE_REPO, RESULTS_REPO, API, DEBUG_QUEUE_REPO, DEBUG_RESULTS_REPO
20
  from src.utils import my_snapshot_download, analyze_gpu_stats, parse_nvidia_smi, monitor_gpus
21
 
22
  from src.leaderboard.read_evals import get_raw_eval_results
23
 
24
  from typing import Optional
25
+ import GPUtil
26
  import time
27
 
28
  import pprint
 
126
  def process_evaluation(task: Task, eval_request: EvalRequest, limit: Optional[int] = None) -> dict:
127
  batch_size = 1
128
  batch_size = eval_request.batch_size
129
+
130
+ if args.debug:
131
+ RESULTS_REPO = DEBUG_RESULTS_REPO
132
 
133
  init_gpu_info = analyze_gpu_stats(parse_nvidia_smi())
134
  # if init_gpu_info['Mem(M)'] > 500:
 
367
  return False
368
 
369
 
370
+ def get_gpu_details():
371
+ gpus = GPUtil.getGPUs()
372
+ gpu = gpus[0]
373
+ name = gpu.name.replace(" ", "-")
374
+ # Convert memory from MB to GB and round to nearest whole number
375
+ memory_gb = round(gpu.memoryTotal / 1024)
376
+ memory = f"{memory_gb}GB"
377
+ formatted_name = f"{name}-{memory}"
378
+ return formatted_name
379
+
380
  def process_pending_requests() -> bool:
381
+ if args.debug:
382
+ QUEUE_REPO = DEBUG_QUEUE_REPO
383
+
384
  sanity_checks()
385
+ print("Processing pending requests")
386
  current_pending_status = [PENDING_STATUS]
387
 
388
  # Get all eval request that are PENDING, if you want to run other evals, change this parameter
 
401
 
402
  eval_request = eval_requests[0]
403
  pp.pprint(eval_request)
404
+
405
+ gpu_type = eval_request.gpu_type
406
+ curr_gpu_type = get_gpu_details()
407
+ if gpu_type != curr_gpu_type:
408
+ print(f"GPU type mismatch: {gpu_type} vs {curr_gpu_type}")
409
+ return False
410
 
411
  my_snapshot_download(
412
  repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60
 
448
  parser.add_argument("--precision", type=str, default="float32,float16,8bit,4bit", help="Precision to debug")
449
  parser.add_argument("--inference-framework", type=str, default="hf-chat", help="Inference framework to debug")
450
  parser.add_argument("--limit", type=int, default=None, help="Limit for the number of samples")
451
+ parser.add_argument("--gpu-type", type=str, default="NVIDIA-A100-PCIe-80GB",
452
+ help="GPU type. NVIDIA-A100-PCIe-80GB; NVIDIA-RTX-A5000-24GB; NVIDIA-H100-PCIe-80GB")
453
  return parser.parse_args()
454
 
455
 
 
478
  status="",
479
  json_filepath="",
480
  precision=precision, # Use precision from arguments
481
+ inference_framework=args.inference_framework, # Use inference framework from arguments
482
+ gpu_type=args.gpu_type
483
  )
484
+ curr_gpu_type = get_gpu_details()
485
+ if eval_request.gpu_type != curr_gpu_type:
486
+ print(f"GPU type mismatch: {eval_request.gpu_type} vs {curr_gpu_type}")
487
+ raise Exception("GPU type mismatch")
488
  results = process_evaluation(task, eval_request, limit=args.limit)
489
  except Exception as e:
490
  print(f"debug running error: {e}")
requirements.txt CHANGED
@@ -30,3 +30,4 @@ evaluate
30
  spacy
31
  selfcheckgpt
32
  immutabledict
 
 
30
  spacy
31
  selfcheckgpt
32
  immutabledict
33
+ gputil
src/backend/manage_requests.py CHANGED
@@ -28,6 +28,7 @@ class EvalRequest:
28
  params: Optional[int] = None
29
  license: Optional[str] = ""
30
  batch_size: Optional[int] = 1
 
31
 
32
  def get_model_args(self) -> str:
33
  model_args = f"pretrained={self.model},revision={self.revision},parallelize=True" # ,max_length=4096"
 
28
  params: Optional[int] = None
29
  license: Optional[str] = ""
30
  batch_size: Optional[int] = 1
31
+ gpu_type: Optional[str] = "NVIDIA-A100-PCIe-80GB"
32
 
33
  def get_model_args(self) -> str:
34
  model_args = f"pretrained={self.model},revision={self.revision},parallelize=True" # ,max_length=4096"
src/display/utils.py CHANGED
@@ -140,6 +140,7 @@ class EvalQueueColumn: # Queue column
140
  private = ColumnContent("private", "bool", True)
141
  precision = ColumnContent("precision", "str", True)
142
  weight_type = ColumnContent("weight_type", "str", "Original")
 
143
  status = ColumnContent("status", "str", True)
144
 
145
 
@@ -189,7 +190,25 @@ class InferenceFramework(Enum):
189
  return InferenceFramework.HF_Chat
190
  return InferenceFramework.Unknown
191
 
 
 
 
 
 
192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  class WeightType(Enum):
194
  Adapter = ModelDetails("Adapter")
195
  Original = ModelDetails("Original")
 
140
  private = ColumnContent("private", "bool", True)
141
  precision = ColumnContent("precision", "str", True)
142
  weight_type = ColumnContent("weight_type", "str", "Original")
143
+ model_framework = ColumnContent("inference_framework", "str", True)
144
  status = ColumnContent("status", "str", True)
145
 
146
 
 
190
  return InferenceFramework.HF_Chat
191
  return InferenceFramework.Unknown
192
 
193
+ class GPUType(Enum):
194
+ H100_pcie = ModelDetails("NVIDIA-H100-PCIe-80GB")
195
+ A100_pcie = ModelDetails("NVIDIA-A100-PCIe-80GB")
196
+ A5000 = ModelDetails("NVIDIA-RTX-A5000-24GB")
197
+ Unknown = ModelDetails("?")
198
 
199
+ def to_str(self):
200
+ return self.value.name
201
+
202
+ @staticmethod
203
+ def from_str(gpu_type: str):
204
+ if gpu_type in ["NVIDIA-H100-PCIe-80GB"]:
205
+ return GPUType.A100_pcie
206
+ if gpu_type in ["NVIDIA-A100-PCIe-80GB"]:
207
+ return GPUType.H100_pcie
208
+ if gpu_type in ["NVIDIA-A5000-24GB"]:
209
+ return GPUType.A5000
210
+ return GPUType.Unknown
211
+
212
  class WeightType(Enum):
213
  Adapter = ModelDetails("Adapter")
214
  Original = ModelDetails("Original")
src/envs.py CHANGED
@@ -12,8 +12,8 @@ QUEUE_REPO = "sparse-generative-ai/requests"
12
  QUEUE_REPO_OPEN_LLM = "open-llm-leaderboard/requests"
13
  RESULTS_REPO = "sparse-generative-ai/results"
14
 
15
- PRIVATE_QUEUE_REPO = "sparse-generative-ai/private-requests"
16
- PRIVATE_RESULTS_REPO = "sparse-generative-ai/private-results"
17
 
18
  IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
19
 
 
12
  QUEUE_REPO_OPEN_LLM = "open-llm-leaderboard/requests"
13
  RESULTS_REPO = "sparse-generative-ai/results"
14
 
15
+ DEBUG_QUEUE_REPO = "sparse-generative-ai/debug_requests"
16
+ DEBUG_RESULTS_REPO = "sparse-generative-ai/debug_results"
17
 
18
  IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
19
 
src/populate.py CHANGED
@@ -95,6 +95,7 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> tuple[pd.DataFrame, p
95
 
96
  data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
97
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
 
98
 
99
  all_evals.append(data)
100
  elif ".md" not in entry:
@@ -107,6 +108,7 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> tuple[pd.DataFrame, p
107
 
108
  data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
109
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
 
110
  all_evals.append(data)
111
 
112
  pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
 
95
 
96
  data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
97
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
98
+ data[EvalQueueColumn.model_framework.name] = data.get("inference_framework", "-")
99
 
100
  all_evals.append(data)
101
  elif ".md" not in entry:
 
108
 
109
  data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
110
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
111
+ data[EvalQueueColumn.model_framework.name] = data.get("inference_framework", "-")
112
  all_evals.append(data)
113
 
114
  pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
src/submission/check_validity.py CHANGED
@@ -130,7 +130,7 @@ def already_submitted_models(requested_models_dir: str) -> set[str]:
130
  continue
131
  with open(os.path.join(root, file), "r") as f:
132
  info = json.load(f)
133
- file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}_{info['inference_framework']}")
134
 
135
  # Select organisation
136
  if info["model"].count("/") == 0 or "submitted_time" not in info:
 
130
  continue
131
  with open(os.path.join(root, file), "r") as f:
132
  info = json.load(f)
133
+ file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}_{info['inference_framework']}_{info['gpu_type']}")
134
 
135
  # Select organisation
136
  if info["model"].count("/") == 0 or "submitted_time" not in info:
src/submission/submit.py CHANGED
@@ -3,7 +3,7 @@ import os
3
  from datetime import datetime, timezone
4
 
5
  from src.display.formatting import styled_error, styled_message, styled_warning
6
- from src.envs import API, EVAL_REQUESTS_PATH, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA
7
  from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
8
  from src.submission.check_validity import (
9
  already_submitted_models,
@@ -26,12 +26,17 @@ def add_new_eval(
26
  weight_type: str,
27
  model_type: str,
28
  inference_framework: str,
 
 
29
  ):
30
  global REQUESTED_MODELS
31
  global USERS_TO_SUBMISSION_DATES
32
  if not REQUESTED_MODELS:
33
  REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
34
 
 
 
 
35
  user_name = ""
36
  model_path = model
37
  if "/" in model:
@@ -110,17 +115,18 @@ def add_new_eval(
110
  "params": model_size,
111
  "license": license,
112
  "inference_framework": inference_framework,
 
113
  }
114
 
115
  # Check for duplicate submission
116
- if f"{model}_{revision}_{precision}_{inference_framework}" in REQUESTED_MODELS:
117
  return styled_warning("This model has been already submitted.")
118
 
119
  print("Creating eval file")
120
  OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
121
  os.makedirs(OUT_DIR, exist_ok=True)
122
  # out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}.json"
123
- out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}_{inference_framework}.json"
124
 
125
  with open(out_path, "w") as f:
126
  f.write(json.dumps(eval_entry))
 
3
  from datetime import datetime, timezone
4
 
5
  from src.display.formatting import styled_error, styled_message, styled_warning
6
+ from src.envs import API, EVAL_REQUESTS_PATH, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA, DEBUG_QUEUE_REPO
7
  from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
8
  from src.submission.check_validity import (
9
  already_submitted_models,
 
26
  weight_type: str,
27
  model_type: str,
28
  inference_framework: str,
29
+ debug: bool = False,
30
+ gpu_type: str = "NVIDIA-A100-PCIe-80GB",
31
  ):
32
  global REQUESTED_MODELS
33
  global USERS_TO_SUBMISSION_DATES
34
  if not REQUESTED_MODELS:
35
  REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
36
 
37
+ if debug:
38
+ QUEUE_REPO = DEBUG_QUEUE_REPO
39
+
40
  user_name = ""
41
  model_path = model
42
  if "/" in model:
 
115
  "params": model_size,
116
  "license": license,
117
  "inference_framework": inference_framework,
118
+ "gpu_type": gpu_type
119
  }
120
 
121
  # Check for duplicate submission
122
+ if f"{model}_{revision}_{precision}_{inference_framework}_{gpu_type}" in REQUESTED_MODELS:
123
  return styled_warning("This model has been already submitted.")
124
 
125
  print("Creating eval file")
126
  OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
127
  os.makedirs(OUT_DIR, exist_ok=True)
128
  # out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}.json"
129
+ out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}_{inference_framework}_{gpu_type}.json"
130
 
131
  with open(out_path, "w") as f:
132
  f.write(json.dumps(eval_entry))