navidved commited on
Commit
6d6662e
1 Parent(s): a6b0f6f

Update init.py

Browse files
Files changed (1) hide show
  1. init.py +33 -72
init.py CHANGED
@@ -1,4 +1,4 @@
1
- import os
2
  from constants import EVAL_REQUESTS_PATH
3
  from pathlib import Path
4
  from huggingface_hub import HfApi, Repository
@@ -8,67 +8,58 @@ QUEUE_REPO = os.environ.get("QUEUE_REPO")
8
  QUEUE_PATH = os.environ.get("QUEUE_PATH")
9
 
10
  hf_api = HfApi(
11
- endpoint="https://huggingface.co",
12
- token=TOKEN_HUB,
13
  )
14
- print(TOKEN_HUB)
15
  # Language code for Persian
16
  PERSIAN_LANGUAGE_CODE = "fa"
17
 
18
  def load_all_info_from_dataset_hub():
19
  eval_queue_repo = None
20
- requested_models = None
21
 
22
- passed = True
23
  if TOKEN_HUB is None:
24
- passed = False
25
- else:
26
- print("Pulling evaluation requests and results.")
27
 
28
- eval_queue_repo = Repository(
29
- local_dir=QUEUE_PATH,
30
- clone_from=QUEUE_REPO,
31
- use_auth_token=TOKEN_HUB,
32
- repo_type="dataset",
33
- )
34
- eval_queue_repo.git_pull()
35
-
36
- # Local directory where dataset repo is cloned + folder with eval requests
37
- directory = QUEUE_PATH / EVAL_REQUESTS_PATH
38
- requested_models = get_all_requested_models(directory)
39
- requested_models = [p.stem for p in requested_models]
40
 
41
- # Filter models to only include those supporting Persian language
42
- requested_models = filter_persian_models(requested_models)
43
-
44
- # Local directory where dataset repo is cloned
45
- csv_results = get_csv_with_results(QUEUE_PATH)
46
- if csv_results is None:
47
- passed = False
48
- if not passed:
49
- raise ValueError("No Hugging Face token provided. Skipping evaluation requests and results.")
50
 
51
- return eval_queue_repo, requested_models, csv_results
 
 
 
 
 
 
 
 
52
 
 
53
 
54
  def upload_file(requested_model_name, path_or_fileobj):
55
  dest_repo_file = Path(EVAL_REQUESTS_PATH) / path_or_fileobj.name
56
  dest_repo_file = str(dest_repo_file)
57
  hf_api.upload_file(
58
- path_or_fileobj=path_or_fileobj,
59
- path_in_repo=str(dest_repo_file),
60
- repo_id=QUEUE_REPO,
61
- token=TOKEN_HUB,
62
- repo_type="dataset",
63
- commit_message=f"Add {requested_model_name} to eval queue")
64
-
65
 
66
  def get_all_requested_models(directory):
67
  directory = Path(directory)
68
  all_requested_models = list(directory.glob("*.txt"))
69
  return all_requested_models
70
 
71
-
72
  def get_csv_with_results(directory):
73
  directory = Path(directory)
74
  all_csv_files = list(directory.glob("*.csv"))
@@ -77,10 +68,9 @@ def get_csv_with_results(directory):
77
  return None
78
  return latest[0]
79
 
80
-
81
- def is_model_on_hub(model_name, revision="main") -> bool:
82
  try:
83
- model_name = model_name.replace(" ","")
84
  author = model_name.split("/")[0]
85
  model_id = model_name.split("/")[1]
86
  if len(author) == 0 or len(model_id) == 0:
@@ -90,7 +80,7 @@ def is_model_on_hub(model_name, revision="main") -> bool:
90
 
91
  try:
92
  models = list(hf_api.list_models(author=author, search=model_id))
93
- matched = [model_name for m in models if m.modelId == model_name]
94
  if len(matched) != 1:
95
  return False, "was not found on the hub!"
96
  else:
@@ -98,32 +88,3 @@ def is_model_on_hub(model_name, revision="main") -> bool:
98
  except Exception as e:
99
  print(f"Could not get the model from the hub.: {e}")
100
  return False, "was not found on hub!"
101
-
102
-
103
- def filter_persian_models(model_list):
104
- """
105
- Filters the provided list of models to include only those that support Persian (fa).
106
-
107
- Args:
108
- model_list (list): List of model names to filter.
109
-
110
- Returns:
111
- list: List of models that support Persian.
112
- """
113
- persian_models = []
114
- for model_name in model_list:
115
- try:
116
- # Get model information from Hugging Face Hub
117
- model_info = hf_api.model_info(model_name)
118
- languages = model_info.cardData.get("languages", [])
119
-
120
- # Check if Persian ('fa') is listed in the model's languages
121
- if PERSIAN_LANGUAGE_CODE in languages:
122
- persian_models.append(model_name)
123
- print(f"{model_name} supports Persian language.")
124
- else:
125
- print(f"{model_name} does not support Persian language. Skipping.")
126
- except Exception as e:
127
- print(f"Error fetching model info for {model_name}: {str(e)}")
128
-
129
- return persian_models
 
1
+ import os
2
  from constants import EVAL_REQUESTS_PATH
3
  from pathlib import Path
4
  from huggingface_hub import HfApi, Repository
 
8
  QUEUE_PATH = os.environ.get("QUEUE_PATH")
9
 
10
  hf_api = HfApi(
11
+ endpoint="https://huggingface.co",
12
+ token=TOKEN_HUB,
13
  )
14
+
15
  # Language code for Persian
16
  PERSIAN_LANGUAGE_CODE = "fa"
17
 
18
  def load_all_info_from_dataset_hub():
19
  eval_queue_repo = None
20
+ requested_models = []
21
 
 
22
  if TOKEN_HUB is None:
23
+ raise ValueError("No Hugging Face token provided. Skipping evaluation requests and results.")
 
 
24
 
25
+ print("Pulling evaluation requests and results.")
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ eval_queue_repo = Repository(
28
+ local_dir=QUEUE_PATH,
29
+ clone_from=QUEUE_REPO,
30
+ use_auth_token=TOKEN_HUB,
31
+ repo_type="dataset",
32
+ )
33
+ eval_queue_repo.git_pull()
 
 
34
 
35
+ # Local directory where dataset repo is cloned + folder with eval requests
36
+ directory = QUEUE_PATH / EVAL_REQUESTS_PATH
37
+ requested_models = get_all_requested_models(directory)
38
+ requested_models = [p.stem for p in requested_models]
39
+
40
+ # Local directory where dataset repo is cloned
41
+ csv_results = get_csv_with_results(QUEUE_PATH)
42
+ if csv_results is None:
43
+ raise ValueError("CSV results file not found.")
44
 
45
+ return eval_queue_repo, requested_models, csv_results
46
 
47
  def upload_file(requested_model_name, path_or_fileobj):
48
  dest_repo_file = Path(EVAL_REQUESTS_PATH) / path_or_fileobj.name
49
  dest_repo_file = str(dest_repo_file)
50
  hf_api.upload_file(
51
+ path_or_fileobj=path_or_fileobj,
52
+ path_in_repo=str(dest_repo_file),
53
+ repo_id=QUEUE_REPO,
54
+ token=TOKEN_HUB,
55
+ repo_type="dataset",
56
+ commit_message=f"Add {requested_model_name} to eval queue")
 
57
 
58
  def get_all_requested_models(directory):
59
  directory = Path(directory)
60
  all_requested_models = list(directory.glob("*.txt"))
61
  return all_requested_models
62
 
 
63
  def get_csv_with_results(directory):
64
  directory = Path(directory)
65
  all_csv_files = list(directory.glob("*.csv"))
 
68
  return None
69
  return latest[0]
70
 
71
+ def is_model_on_hub(model_name, revision="main") -> (bool, str):
 
72
  try:
73
+ model_name = model_name.replace(" ", "")
74
  author = model_name.split("/")[0]
75
  model_id = model_name.split("/")[1]
76
  if len(author) == 0 or len(model_id) == 0:
 
80
 
81
  try:
82
  models = list(hf_api.list_models(author=author, search=model_id))
83
+ matched = [m.modelId for m in models if m.modelId == model_name]
84
  if len(matched) != 1:
85
  return False, "was not found on the hub!"
86
  else:
 
88
  except Exception as e:
89
  print(f"Could not get the model from the hub.: {e}")
90
  return False, "was not found on hub!"