add choice for device, and verify in backend. Add debug mode (#18)
Browse files- Add app debug mode and dynamic refresh tables (2a18e0ad941b517867200352ba49273da53f5907)
- Merge branch 'main' into pr/15 (a4829c27aeca63dc5327ec3a4287eb66bb2cbde8)
- Add inference_framework to the queue column (86b14ca786017b5479b54e4402226f7597295729)
- Add requirements (f5ff85d527bfa186b8e105d5637ac4e3793a9721)
- Merge branch 'main' into pr/15 (08b56fc73f03f150ef1baa35f78e762dcbf83fd5)
- Merge branch 'pr/15' into pr/18 (b2a2a5bae92f4b80223988e2059a69dfac7caaa8)
- Add GPU types (60d9c33965a34f63d2026b722afa33c03fe48306)
- Delete requests (22ce8a7836b70c1849ec4aeb77be3fce2642bcab)
- add choices for GPU and Solve leaderboard issue (bc48941fdfee36d8d1510a96b2969daa5d1ebf3a)
- fix a bug (6e99f9d4535fd801ae6b675ef2d833cc109e9d74)
- Apply GPU type verification on backend debug mode (dbe8db4df45ec9d75a8ce5abd46b77ff2e7627b7)
- Fix a bug (0fb715c8b89cef41ec9497c09b6ad8db47f65d78)
- app.py +83 -16
- backend-cli.py +33 -4
- requirements.txt +1 -0
- src/backend/manage_requests.py +1 -0
- src/display/utils.py +19 -0
- src/envs.py +2 -2
- src/populate.py +2 -0
- src/submission/check_validity.py +1 -1
- src/submission/submit.py +9 -3
@@ -2,10 +2,11 @@
|
|
2 |
import os
|
3 |
import datetime
|
4 |
import socket
|
|
|
5 |
|
6 |
import gradio as gr
|
7 |
import pandas as pd
|
8 |
-
|
9 |
from apscheduler.schedulers.background import BackgroundScheduler
|
10 |
|
11 |
from huggingface_hub import snapshot_download
|
@@ -35,13 +36,27 @@ from src.display.utils import (
|
|
35 |
fields,
|
36 |
WeightType,
|
37 |
Precision,
|
|
|
38 |
)
|
39 |
|
40 |
-
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC,
|
|
|
41 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
42 |
from src.submission.submit import add_new_eval
|
43 |
from src.utils import get_dataset_summary_table
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
def ui_snapshot_download(repo_id, local_dir, repo_type, tqdm_class, etag_timeout):
|
47 |
try:
|
@@ -75,11 +90,6 @@ def init_space():
|
|
75 |
)
|
76 |
return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
|
77 |
|
78 |
-
|
79 |
-
dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
|
80 |
-
leaderboard_df = original_df.copy()
|
81 |
-
|
82 |
-
|
83 |
# Searching and filtering
|
84 |
def update_table(
|
85 |
hidden_df: pd.DataFrame, columns: list, type_query: list, precision_query: list, size_query: list, query: str
|
@@ -142,6 +152,51 @@ def filter_models(df: pd.DataFrame, type_query: list, size_query: list, precisio
|
|
142 |
|
143 |
return filtered_df
|
144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
# triggered only once at startup => read query parameter if it exists
|
147 |
def load_query(request: gr.Request):
|
@@ -162,7 +217,7 @@ with demo:
|
|
162 |
search_bar = gr.Textbox(
|
163 |
placeholder=" 🔍 Model search (separate multiple queries with `;`)",
|
164 |
show_label=False,
|
165 |
-
elem_id="search-bar"
|
166 |
)
|
167 |
with gr.Row():
|
168 |
shown_columns = gr.CheckboxGroup(
|
@@ -251,14 +306,14 @@ with demo:
|
|
251 |
filter_columns_size,
|
252 |
search_bar,
|
253 |
],
|
254 |
-
leaderboard_table
|
255 |
)
|
256 |
|
257 |
# Check query parameter once at startup and update search bar
|
258 |
demo.load(load_query, inputs=[], outputs=[search_bar])
|
259 |
|
260 |
for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size]:
|
261 |
-
selector.
|
262 |
update_table,
|
263 |
[
|
264 |
hidden_leaderboard_table_for_search,
|
@@ -323,6 +378,15 @@ with demo:
|
|
323 |
value=None,
|
324 |
interactive=True,
|
325 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
326 |
|
327 |
with gr.Row():
|
328 |
with gr.Column():
|
@@ -358,6 +422,7 @@ with demo:
|
|
358 |
|
359 |
submit_button = gr.Button("Submit Eval")
|
360 |
submission_result = gr.Markdown()
|
|
|
361 |
submit_button.click(
|
362 |
add_new_eval,
|
363 |
[
|
@@ -369,6 +434,8 @@ with demo:
|
|
369 |
weight_type,
|
370 |
model_type,
|
371 |
inference_framework,
|
|
|
|
|
372 |
],
|
373 |
submission_result,
|
374 |
)
|
@@ -385,8 +452,7 @@ with demo:
|
|
385 |
|
386 |
scheduler = BackgroundScheduler()
|
387 |
|
388 |
-
scheduler.add_job(restart_space, "interval",
|
389 |
-
|
390 |
|
391 |
def launch_backend():
|
392 |
import subprocess
|
@@ -395,8 +461,9 @@ def launch_backend():
|
|
395 |
if DEVICE not in {"cpu"}:
|
396 |
_ = subprocess.run(["python", "backend-cli.py"])
|
397 |
|
398 |
-
|
399 |
# scheduler.add_job(launch_backend, "interval", seconds=120)
|
400 |
-
|
401 |
-
scheduler.start()
|
402 |
-
demo.queue(default_concurrency_limit=40).launch()
|
|
|
|
2 |
import os
|
3 |
import datetime
|
4 |
import socket
|
5 |
+
from threading import Thread
|
6 |
|
7 |
import gradio as gr
|
8 |
import pandas as pd
|
9 |
+
import time
|
10 |
from apscheduler.schedulers.background import BackgroundScheduler
|
11 |
|
12 |
from huggingface_hub import snapshot_download
|
|
|
36 |
fields,
|
37 |
WeightType,
|
38 |
Precision,
|
39 |
+
GPUType
|
40 |
)
|
41 |
|
42 |
+
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, \
|
43 |
+
QUEUE_REPO, REPO_ID, RESULTS_REPO, DEBUG_QUEUE_REPO, DEBUG_RESULTS_REPO
|
44 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
45 |
from src.submission.submit import add_new_eval
|
46 |
from src.utils import get_dataset_summary_table
|
47 |
|
48 |
+
def get_args():
|
49 |
+
import argparse
|
50 |
+
|
51 |
+
parser = argparse.ArgumentParser(description="Run the LLM Leaderboard")
|
52 |
+
parser.add_argument("--debug", action="store_true", help="Run in debug mode")
|
53 |
+
return parser.parse_args()
|
54 |
+
|
55 |
+
args = get_args()
|
56 |
+
if args.debug:
|
57 |
+
print("Running in debug mode")
|
58 |
+
QUEUE_REPO = DEBUG_QUEUE_REPO
|
59 |
+
RESULTS_REPO = DEBUG_RESULTS_REPO
|
60 |
|
61 |
def ui_snapshot_download(repo_id, local_dir, repo_type, tqdm_class, etag_timeout):
|
62 |
try:
|
|
|
90 |
)
|
91 |
return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
|
92 |
|
|
|
|
|
|
|
|
|
|
|
93 |
# Searching and filtering
|
94 |
def update_table(
|
95 |
hidden_df: pd.DataFrame, columns: list, type_query: list, precision_query: list, size_query: list, query: str
|
|
|
152 |
|
153 |
return filtered_df
|
154 |
|
155 |
+
shown_columns = None
|
156 |
+
dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
|
157 |
+
leaderboard_df = original_df.copy()
|
158 |
+
|
159 |
+
# def update_leaderboard_table():
|
160 |
+
# global leaderboard_df, shown_columns
|
161 |
+
# print("Updating leaderboard table")
|
162 |
+
# return leaderboard_df[
|
163 |
+
# [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
|
164 |
+
# + shown_columns.value
|
165 |
+
# + [AutoEvalColumn.dummy.name]
|
166 |
+
# ] if not leaderboard_df.empty else leaderboard_df
|
167 |
+
|
168 |
+
|
169 |
+
# def update_hidden_leaderboard_table():
|
170 |
+
# global original_df
|
171 |
+
# return original_df[COLS] if original_df.empty is False else original_df
|
172 |
+
|
173 |
+
# def update_dataset_table():
|
174 |
+
# global dataset_df
|
175 |
+
# return dataset_df
|
176 |
+
|
177 |
+
# def update_finish_table():
|
178 |
+
# global finished_eval_queue_df
|
179 |
+
# return finished_eval_queue_df
|
180 |
+
|
181 |
+
# def update_running_table():
|
182 |
+
# global running_eval_queue_df
|
183 |
+
# return running_eval_queue_df
|
184 |
+
|
185 |
+
# def update_pending_table():
|
186 |
+
# global pending_eval_queue_df
|
187 |
+
# return pending_eval_queue_df
|
188 |
+
|
189 |
+
# def update_finish_num():
|
190 |
+
# global finished_eval_queue_df
|
191 |
+
# return len(finished_eval_queue_df)
|
192 |
+
|
193 |
+
# def update_running_num():
|
194 |
+
# global running_eval_queue_df
|
195 |
+
# return len(running_eval_queue_df)
|
196 |
+
|
197 |
+
# def update_pending_num():
|
198 |
+
# global pending_eval_queue_df
|
199 |
+
# return len(pending_eval_queue_df)
|
200 |
|
201 |
# triggered only once at startup => read query parameter if it exists
|
202 |
def load_query(request: gr.Request):
|
|
|
217 |
search_bar = gr.Textbox(
|
218 |
placeholder=" 🔍 Model search (separate multiple queries with `;`)",
|
219 |
show_label=False,
|
220 |
+
elem_id="search-bar"
|
221 |
)
|
222 |
with gr.Row():
|
223 |
shown_columns = gr.CheckboxGroup(
|
|
|
306 |
filter_columns_size,
|
307 |
search_bar,
|
308 |
],
|
309 |
+
leaderboard_table
|
310 |
)
|
311 |
|
312 |
# Check query parameter once at startup and update search bar
|
313 |
demo.load(load_query, inputs=[], outputs=[search_bar])
|
314 |
|
315 |
for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size]:
|
316 |
+
selector.select(
|
317 |
update_table,
|
318 |
[
|
319 |
hidden_leaderboard_table_for_search,
|
|
|
378 |
value=None,
|
379 |
interactive=True,
|
380 |
)
|
381 |
+
|
382 |
+
gpu_type = gr.Dropdown(
|
383 |
+
choices=[t.to_str() for t in GPUType],
|
384 |
+
label="GPU type",
|
385 |
+
multiselect=False,
|
386 |
+
value="NVIDIA-A100-PCIe-80GB",
|
387 |
+
interactive=True,
|
388 |
+
)
|
389 |
+
|
390 |
|
391 |
with gr.Row():
|
392 |
with gr.Column():
|
|
|
422 |
|
423 |
submit_button = gr.Button("Submit Eval")
|
424 |
submission_result = gr.Markdown()
|
425 |
+
debug = gr.Checkbox(value=args.debug, label="Debug", visible=False)
|
426 |
submit_button.click(
|
427 |
add_new_eval,
|
428 |
[
|
|
|
434 |
weight_type,
|
435 |
model_type,
|
436 |
inference_framework,
|
437 |
+
debug,
|
438 |
+
gpu_type
|
439 |
],
|
440 |
submission_result,
|
441 |
)
|
|
|
452 |
|
453 |
scheduler = BackgroundScheduler()
|
454 |
|
455 |
+
scheduler.add_job(restart_space, "interval", hours=6)
|
|
|
456 |
|
457 |
def launch_backend():
|
458 |
import subprocess
|
|
|
461 |
if DEVICE not in {"cpu"}:
|
462 |
_ = subprocess.run(["python", "backend-cli.py"])
|
463 |
|
464 |
+
# Thread(target=periodic_init, daemon=True).start()
|
465 |
# scheduler.add_job(launch_backend, "interval", seconds=120)
|
466 |
+
if __name__ == "__main__":
|
467 |
+
scheduler.start()
|
468 |
+
demo.queue(default_concurrency_limit=40).launch()
|
469 |
+
|
@@ -16,13 +16,13 @@ from src.backend.envs import Tasks, EVAL_REQUESTS_PATH_BACKEND, EVAL_RESULTS_PAT
|
|
16 |
from src.backend.manage_requests import EvalRequest
|
17 |
from src.leaderboard.read_evals import EvalResult
|
18 |
|
19 |
-
from src.envs import QUEUE_REPO, RESULTS_REPO, API
|
20 |
from src.utils import my_snapshot_download, analyze_gpu_stats, parse_nvidia_smi, monitor_gpus
|
21 |
|
22 |
from src.leaderboard.read_evals import get_raw_eval_results
|
23 |
|
24 |
from typing import Optional
|
25 |
-
|
26 |
import time
|
27 |
|
28 |
import pprint
|
@@ -126,6 +126,9 @@ def request_to_result_name(request: EvalRequest) -> str:
|
|
126 |
def process_evaluation(task: Task, eval_request: EvalRequest, limit: Optional[int] = None) -> dict:
|
127 |
batch_size = 1
|
128 |
batch_size = eval_request.batch_size
|
|
|
|
|
|
|
129 |
|
130 |
init_gpu_info = analyze_gpu_stats(parse_nvidia_smi())
|
131 |
# if init_gpu_info['Mem(M)'] > 500:
|
@@ -364,9 +367,22 @@ def maybe_refresh_results(thr: int, hard_task_lst: Optional[list[str]] = None) -
|
|
364 |
return False
|
365 |
|
366 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
367 |
def process_pending_requests() -> bool:
|
|
|
|
|
|
|
368 |
sanity_checks()
|
369 |
-
|
370 |
current_pending_status = [PENDING_STATUS]
|
371 |
|
372 |
# Get all eval request that are PENDING, if you want to run other evals, change this parameter
|
@@ -385,6 +401,12 @@ def process_pending_requests() -> bool:
|
|
385 |
|
386 |
eval_request = eval_requests[0]
|
387 |
pp.pprint(eval_request)
|
|
|
|
|
|
|
|
|
|
|
|
|
388 |
|
389 |
my_snapshot_download(
|
390 |
repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60
|
@@ -426,6 +448,8 @@ def get_args():
|
|
426 |
parser.add_argument("--precision", type=str, default="float32,float16,8bit,4bit", help="Precision to debug")
|
427 |
parser.add_argument("--inference-framework", type=str, default="hf-chat", help="Inference framework to debug")
|
428 |
parser.add_argument("--limit", type=int, default=None, help="Limit for the number of samples")
|
|
|
|
|
429 |
return parser.parse_args()
|
430 |
|
431 |
|
@@ -454,8 +478,13 @@ if __name__ == "__main__":
|
|
454 |
status="",
|
455 |
json_filepath="",
|
456 |
precision=precision, # Use precision from arguments
|
457 |
-
inference_framework=args.inference_framework # Use inference framework from arguments
|
|
|
458 |
)
|
|
|
|
|
|
|
|
|
459 |
results = process_evaluation(task, eval_request, limit=args.limit)
|
460 |
except Exception as e:
|
461 |
print(f"debug running error: {e}")
|
|
|
16 |
from src.backend.manage_requests import EvalRequest
|
17 |
from src.leaderboard.read_evals import EvalResult
|
18 |
|
19 |
+
from src.envs import QUEUE_REPO, RESULTS_REPO, API, DEBUG_QUEUE_REPO, DEBUG_RESULTS_REPO
|
20 |
from src.utils import my_snapshot_download, analyze_gpu_stats, parse_nvidia_smi, monitor_gpus
|
21 |
|
22 |
from src.leaderboard.read_evals import get_raw_eval_results
|
23 |
|
24 |
from typing import Optional
|
25 |
+
import GPUtil
|
26 |
import time
|
27 |
|
28 |
import pprint
|
|
|
126 |
def process_evaluation(task: Task, eval_request: EvalRequest, limit: Optional[int] = None) -> dict:
|
127 |
batch_size = 1
|
128 |
batch_size = eval_request.batch_size
|
129 |
+
|
130 |
+
if args.debug:
|
131 |
+
RESULTS_REPO = DEBUG_RESULTS_REPO
|
132 |
|
133 |
init_gpu_info = analyze_gpu_stats(parse_nvidia_smi())
|
134 |
# if init_gpu_info['Mem(M)'] > 500:
|
|
|
367 |
return False
|
368 |
|
369 |
|
370 |
+
def get_gpu_details():
|
371 |
+
gpus = GPUtil.getGPUs()
|
372 |
+
gpu = gpus[0]
|
373 |
+
name = gpu.name.replace(" ", "-")
|
374 |
+
# Convert memory from MB to GB and round to nearest whole number
|
375 |
+
memory_gb = round(gpu.memoryTotal / 1024)
|
376 |
+
memory = f"{memory_gb}GB"
|
377 |
+
formatted_name = f"{name}-{memory}"
|
378 |
+
return formatted_name
|
379 |
+
|
380 |
def process_pending_requests() -> bool:
|
381 |
+
if args.debug:
|
382 |
+
QUEUE_REPO = DEBUG_QUEUE_REPO
|
383 |
+
|
384 |
sanity_checks()
|
385 |
+
print("Processing pending requests")
|
386 |
current_pending_status = [PENDING_STATUS]
|
387 |
|
388 |
# Get all eval request that are PENDING, if you want to run other evals, change this parameter
|
|
|
401 |
|
402 |
eval_request = eval_requests[0]
|
403 |
pp.pprint(eval_request)
|
404 |
+
|
405 |
+
gpu_type = eval_request.gpu_type
|
406 |
+
curr_gpu_type = get_gpu_details()
|
407 |
+
if gpu_type != curr_gpu_type:
|
408 |
+
print(f"GPU type mismatch: {gpu_type} vs {curr_gpu_type}")
|
409 |
+
return False
|
410 |
|
411 |
my_snapshot_download(
|
412 |
repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60
|
|
|
448 |
parser.add_argument("--precision", type=str, default="float32,float16,8bit,4bit", help="Precision to debug")
|
449 |
parser.add_argument("--inference-framework", type=str, default="hf-chat", help="Inference framework to debug")
|
450 |
parser.add_argument("--limit", type=int, default=None, help="Limit for the number of samples")
|
451 |
+
parser.add_argument("--gpu-type", type=str, default="NVIDIA-A100-PCIe-80GB",
|
452 |
+
help="GPU type. NVIDIA-A100-PCIe-80GB; NVIDIA-RTX-A5000-24GB; NVIDIA-H100-PCIe-80GB")
|
453 |
return parser.parse_args()
|
454 |
|
455 |
|
|
|
478 |
status="",
|
479 |
json_filepath="",
|
480 |
precision=precision, # Use precision from arguments
|
481 |
+
inference_framework=args.inference_framework, # Use inference framework from arguments
|
482 |
+
gpu_type=args.gpu_type
|
483 |
)
|
484 |
+
curr_gpu_type = get_gpu_details()
|
485 |
+
if eval_request.gpu_type != curr_gpu_type:
|
486 |
+
print(f"GPU type mismatch: {eval_request.gpu_type} vs {curr_gpu_type}")
|
487 |
+
raise Exception("GPU type mismatch")
|
488 |
results = process_evaluation(task, eval_request, limit=args.limit)
|
489 |
except Exception as e:
|
490 |
print(f"debug running error: {e}")
|
@@ -30,3 +30,4 @@ evaluate
|
|
30 |
spacy
|
31 |
selfcheckgpt
|
32 |
immutabledict
|
|
|
|
30 |
spacy
|
31 |
selfcheckgpt
|
32 |
immutabledict
|
33 |
+
gputil
|
@@ -28,6 +28,7 @@ class EvalRequest:
|
|
28 |
params: Optional[int] = None
|
29 |
license: Optional[str] = ""
|
30 |
batch_size: Optional[int] = 1
|
|
|
31 |
|
32 |
def get_model_args(self) -> str:
|
33 |
model_args = f"pretrained={self.model},revision={self.revision},parallelize=True" # ,max_length=4096"
|
|
|
28 |
params: Optional[int] = None
|
29 |
license: Optional[str] = ""
|
30 |
batch_size: Optional[int] = 1
|
31 |
+
gpu_type: Optional[str] = "NVIDIA-A100-PCIe-80GB"
|
32 |
|
33 |
def get_model_args(self) -> str:
|
34 |
model_args = f"pretrained={self.model},revision={self.revision},parallelize=True" # ,max_length=4096"
|
@@ -140,6 +140,7 @@ class EvalQueueColumn: # Queue column
|
|
140 |
private = ColumnContent("private", "bool", True)
|
141 |
precision = ColumnContent("precision", "str", True)
|
142 |
weight_type = ColumnContent("weight_type", "str", "Original")
|
|
|
143 |
status = ColumnContent("status", "str", True)
|
144 |
|
145 |
|
@@ -189,7 +190,25 @@ class InferenceFramework(Enum):
|
|
189 |
return InferenceFramework.HF_Chat
|
190 |
return InferenceFramework.Unknown
|
191 |
|
|
|
|
|
|
|
|
|
|
|
192 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
class WeightType(Enum):
|
194 |
Adapter = ModelDetails("Adapter")
|
195 |
Original = ModelDetails("Original")
|
|
|
140 |
private = ColumnContent("private", "bool", True)
|
141 |
precision = ColumnContent("precision", "str", True)
|
142 |
weight_type = ColumnContent("weight_type", "str", "Original")
|
143 |
+
model_framework = ColumnContent("inference_framework", "str", True)
|
144 |
status = ColumnContent("status", "str", True)
|
145 |
|
146 |
|
|
|
190 |
return InferenceFramework.HF_Chat
|
191 |
return InferenceFramework.Unknown
|
192 |
|
193 |
+
class GPUType(Enum):
|
194 |
+
H100_pcie = ModelDetails("NVIDIA-H100-PCIe-80GB")
|
195 |
+
A100_pcie = ModelDetails("NVIDIA-A100-PCIe-80GB")
|
196 |
+
A5000 = ModelDetails("NVIDIA-RTX-A5000-24GB")
|
197 |
+
Unknown = ModelDetails("?")
|
198 |
|
199 |
+
def to_str(self):
|
200 |
+
return self.value.name
|
201 |
+
|
202 |
+
@staticmethod
|
203 |
+
def from_str(gpu_type: str):
|
204 |
+
if gpu_type in ["NVIDIA-H100-PCIe-80GB"]:
|
205 |
+
return GPUType.A100_pcie
|
206 |
+
if gpu_type in ["NVIDIA-A100-PCIe-80GB"]:
|
207 |
+
return GPUType.H100_pcie
|
208 |
+
if gpu_type in ["NVIDIA-A5000-24GB"]:
|
209 |
+
return GPUType.A5000
|
210 |
+
return GPUType.Unknown
|
211 |
+
|
212 |
class WeightType(Enum):
|
213 |
Adapter = ModelDetails("Adapter")
|
214 |
Original = ModelDetails("Original")
|
@@ -12,8 +12,8 @@ QUEUE_REPO = "sparse-generative-ai/requests"
|
|
12 |
QUEUE_REPO_OPEN_LLM = "open-llm-leaderboard/requests"
|
13 |
RESULTS_REPO = "sparse-generative-ai/results"
|
14 |
|
15 |
-
|
16 |
-
|
17 |
|
18 |
IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
|
19 |
|
|
|
12 |
QUEUE_REPO_OPEN_LLM = "open-llm-leaderboard/requests"
|
13 |
RESULTS_REPO = "sparse-generative-ai/results"
|
14 |
|
15 |
+
DEBUG_QUEUE_REPO = "sparse-generative-ai/debug_requests"
|
16 |
+
DEBUG_RESULTS_REPO = "sparse-generative-ai/debug_results"
|
17 |
|
18 |
IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
|
19 |
|
@@ -95,6 +95,7 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> tuple[pd.DataFrame, p
|
|
95 |
|
96 |
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
|
97 |
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
|
|
98 |
|
99 |
all_evals.append(data)
|
100 |
elif ".md" not in entry:
|
@@ -107,6 +108,7 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> tuple[pd.DataFrame, p
|
|
107 |
|
108 |
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
|
109 |
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
|
|
110 |
all_evals.append(data)
|
111 |
|
112 |
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
|
|
|
95 |
|
96 |
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
|
97 |
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
98 |
+
data[EvalQueueColumn.model_framework.name] = data.get("inference_framework", "-")
|
99 |
|
100 |
all_evals.append(data)
|
101 |
elif ".md" not in entry:
|
|
|
108 |
|
109 |
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
|
110 |
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
111 |
+
data[EvalQueueColumn.model_framework.name] = data.get("inference_framework", "-")
|
112 |
all_evals.append(data)
|
113 |
|
114 |
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
|
@@ -130,7 +130,7 @@ def already_submitted_models(requested_models_dir: str) -> set[str]:
|
|
130 |
continue
|
131 |
with open(os.path.join(root, file), "r") as f:
|
132 |
info = json.load(f)
|
133 |
-
file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}_{info['inference_framework']}")
|
134 |
|
135 |
# Select organisation
|
136 |
if info["model"].count("/") == 0 or "submitted_time" not in info:
|
|
|
130 |
continue
|
131 |
with open(os.path.join(root, file), "r") as f:
|
132 |
info = json.load(f)
|
133 |
+
file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}_{info['inference_framework']}_{info['gpu_type']}")
|
134 |
|
135 |
# Select organisation
|
136 |
if info["model"].count("/") == 0 or "submitted_time" not in info:
|
@@ -3,7 +3,7 @@ import os
|
|
3 |
from datetime import datetime, timezone
|
4 |
|
5 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
6 |
-
from src.envs import API, EVAL_REQUESTS_PATH, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA
|
7 |
from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
|
8 |
from src.submission.check_validity import (
|
9 |
already_submitted_models,
|
@@ -26,12 +26,17 @@ def add_new_eval(
|
|
26 |
weight_type: str,
|
27 |
model_type: str,
|
28 |
inference_framework: str,
|
|
|
|
|
29 |
):
|
30 |
global REQUESTED_MODELS
|
31 |
global USERS_TO_SUBMISSION_DATES
|
32 |
if not REQUESTED_MODELS:
|
33 |
REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
|
34 |
|
|
|
|
|
|
|
35 |
user_name = ""
|
36 |
model_path = model
|
37 |
if "/" in model:
|
@@ -110,17 +115,18 @@ def add_new_eval(
|
|
110 |
"params": model_size,
|
111 |
"license": license,
|
112 |
"inference_framework": inference_framework,
|
|
|
113 |
}
|
114 |
|
115 |
# Check for duplicate submission
|
116 |
-
if f"{model}_{revision}_{precision}_{inference_framework}" in REQUESTED_MODELS:
|
117 |
return styled_warning("This model has been already submitted.")
|
118 |
|
119 |
print("Creating eval file")
|
120 |
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
121 |
os.makedirs(OUT_DIR, exist_ok=True)
|
122 |
# out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}.json"
|
123 |
-
out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}_{inference_framework}.json"
|
124 |
|
125 |
with open(out_path, "w") as f:
|
126 |
f.write(json.dumps(eval_entry))
|
|
|
3 |
from datetime import datetime, timezone
|
4 |
|
5 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
6 |
+
from src.envs import API, EVAL_REQUESTS_PATH, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA, DEBUG_QUEUE_REPO
|
7 |
from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
|
8 |
from src.submission.check_validity import (
|
9 |
already_submitted_models,
|
|
|
26 |
weight_type: str,
|
27 |
model_type: str,
|
28 |
inference_framework: str,
|
29 |
+
debug: bool = False,
|
30 |
+
gpu_type: str = "NVIDIA-A100-PCIe-80GB",
|
31 |
):
|
32 |
global REQUESTED_MODELS
|
33 |
global USERS_TO_SUBMISSION_DATES
|
34 |
if not REQUESTED_MODELS:
|
35 |
REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
|
36 |
|
37 |
+
if debug:
|
38 |
+
QUEUE_REPO = DEBUG_QUEUE_REPO
|
39 |
+
|
40 |
user_name = ""
|
41 |
model_path = model
|
42 |
if "/" in model:
|
|
|
115 |
"params": model_size,
|
116 |
"license": license,
|
117 |
"inference_framework": inference_framework,
|
118 |
+
"gpu_type": gpu_type
|
119 |
}
|
120 |
|
121 |
# Check for duplicate submission
|
122 |
+
if f"{model}_{revision}_{precision}_{inference_framework}_{gpu_type}" in REQUESTED_MODELS:
|
123 |
return styled_warning("This model has been already submitted.")
|
124 |
|
125 |
print("Creating eval file")
|
126 |
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
127 |
os.makedirs(OUT_DIR, exist_ok=True)
|
128 |
# out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}.json"
|
129 |
+
out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}_{inference_framework}_{gpu_type}.json"
|
130 |
|
131 |
with open(out_path, "w") as f:
|
132 |
f.write(json.dumps(eval_entry))
|