Spaces:
Runtime error
Runtime error
pminervini
commited on
Commit
•
1793c69
1
Parent(s):
bc4faee
update
Browse files- src/submission/check_validity.py +15 -28
src/submission/check_validity.py
CHANGED
@@ -7,11 +7,14 @@ from datetime import datetime, timedelta, timezone
|
|
7 |
import huggingface_hub
|
8 |
from huggingface_hub import ModelCard
|
9 |
from huggingface_hub.hf_api import ModelInfo
|
10 |
-
|
|
|
11 |
from transformers.models.auto.tokenization_auto import tokenizer_class_from_name, get_tokenizer_config
|
12 |
|
13 |
from src.envs import HAS_HIGHER_RATE_LIMIT
|
14 |
|
|
|
|
|
15 |
|
16 |
# ht to @Wauplin, thank you for the snippet!
|
17 |
# See https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/317
|
@@ -37,39 +40,23 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
|
|
37 |
return True, ""
|
38 |
|
39 |
|
40 |
-
def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
|
41 |
try:
|
42 |
-
config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
|
43 |
if test_tokenizer:
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
tokenizer_class = None
|
52 |
-
if tokenizer_class_candidate is not None:
|
53 |
-
tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
|
54 |
-
|
55 |
-
if tokenizer_class is None:
|
56 |
-
return (
|
57 |
-
False,
|
58 |
-
f"uses {tokenizer_class_candidate}, which is not in a transformers release, therefore not supported at the moment.",
|
59 |
-
None
|
60 |
-
)
|
61 |
return True, None, config
|
62 |
|
63 |
-
except ValueError:
|
64 |
-
return
|
65 |
-
False,
|
66 |
-
"needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
|
67 |
-
None
|
68 |
-
)
|
69 |
|
70 |
except Exception as e:
|
71 |
-
|
72 |
-
return False, "was not found on hub!", None
|
73 |
|
74 |
|
75 |
def get_model_size(model_info: ModelInfo, precision: str):
|
|
|
7 |
import huggingface_hub
|
8 |
from huggingface_hub import ModelCard
|
9 |
from huggingface_hub.hf_api import ModelInfo
|
10 |
+
|
11 |
+
from transformers import AutoConfig, AutoTokenizer
|
12 |
from transformers.models.auto.tokenization_auto import tokenizer_class_from_name, get_tokenizer_config
|
13 |
|
14 |
from src.envs import HAS_HIGHER_RATE_LIMIT
|
15 |
|
16 |
+
from typing import Optional
|
17 |
+
|
18 |
|
19 |
# ht to @Wauplin, thank you for the snippet!
|
20 |
# See https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/317
|
|
|
40 |
return True, ""
|
41 |
|
42 |
|
43 |
+
def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, Optional[str], Optional[AutoConfig]]:
|
44 |
try:
|
45 |
+
config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token) #, force_download=True)
|
46 |
if test_tokenizer:
|
47 |
+
try:
|
48 |
+
AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
|
49 |
+
except ValueError as e:
|
50 |
+
return False, f"uses a tokenizer which is not in a transformers release: {e}", None
|
51 |
+
except Exception as e:
|
52 |
+
return False, "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?", None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
return True, None, config
|
54 |
|
55 |
+
except ValueError as e:
|
56 |
+
return False, "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.", None
|
|
|
|
|
|
|
|
|
57 |
|
58 |
except Exception as e:
|
59 |
+
return False, f"was not found on hub -- {str(e)}", None
|
|
|
60 |
|
61 |
|
62 |
def get_model_size(model_info: ModelInfo, precision: str):
|