eduagarcia commited on
Commit
59399bc
β€’
1 Parent(s): 4717ca8

Make model text exibit precision if there's more than one eval or precision is not float16 or bfloat16

Browse files
src/display/formatting.py CHANGED
@@ -24,7 +24,7 @@ def make_requests_clickable_model(model_name, json_path=None):
24
 
25
  return model_hyperlink(link, model_name) + " " + model_hyperlink(details_link, "πŸ“‘")
26
 
27
- def make_clickable_model(model_name, json_path=None, revision=None):
28
  link = f"https://huggingface.co/{model_name}"
29
 
30
  #details_model_name = model_name.replace("/", "__")
@@ -35,11 +35,20 @@ def make_clickable_model(model_name, json_path=None, revision=None):
35
  if json_path is not None:
36
  details_link = f"https://huggingface.co/datasets/{RESULTS_REPO}/blob/main/{model_name}/{json_path}"
37
 
 
38
  if revision is not None and revision != "" and revision != "main":
39
  if len(revision) > 12:
40
  revision = revision[:7]
41
- model_name += f" (rev: {revision})"
42
- return model_hyperlink(link, model_name) + " " + model_hyperlink(details_link, "πŸ“‘")
 
 
 
 
 
 
 
 
43
 
44
 
45
  def styled_error(error):
 
24
 
25
  return model_hyperlink(link, model_name) + " " + model_hyperlink(details_link, "πŸ“‘")
26
 
27
+ def make_clickable_model(model_name, json_path=None, revision=None, precision=None, num_evals_same_model=1):
28
  link = f"https://huggingface.co/{model_name}"
29
 
30
  #details_model_name = model_name.replace("/", "__")
 
35
  if json_path is not None:
36
  details_link = f"https://huggingface.co/datasets/{RESULTS_REPO}/blob/main/{model_name}/{json_path}"
37
 
38
+ posfix = ""
39
  if revision is not None and revision != "" and revision != "main":
40
  if len(revision) > 12:
41
  revision = revision[:7]
42
+ posfix += f" (rev: {revision})"
43
+ if precision is not None:
44
+ if num_evals_same_model == 1 and precision in ['float16', 'bfloat16']:
45
+ pass
46
+ else:
47
+ #if precision not in model_name:
48
+ posfix += f" [{precision}]"
49
+ posfix = posfix.strip()
50
+
51
+ return model_hyperlink(link, model_name) + " " + model_hyperlink(details_link, "πŸ“‘") + " " + posfix
52
 
53
 
54
  def styled_error(error):
src/leaderboard/read_evals.py CHANGED
@@ -43,6 +43,7 @@ class EvalResult:
43
  eval_time: float = 0.0
44
  original_benchmark_average: float = None
45
  hidden: bool = False # Do not show on the leaderboard
 
46
 
47
  @classmethod
48
  def init_from_json_file(self, json_filepath, is_original=False):
@@ -188,7 +189,7 @@ class EvalResult:
188
  AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
189
  AutoEvalColumn.weight_type.name: self.weight_type.value.name,
190
  AutoEvalColumn.architecture.name: self.architecture,
191
- AutoEvalColumn.model.name: make_clickable_model(self.full_model, self.json_filename, revision=self.revision),
192
  AutoEvalColumn.dummy.name: self.full_model,
193
  AutoEvalColumn.revision.name: self.revision,
194
  AutoEvalColumn.average.name: average,
@@ -263,6 +264,7 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
263
  with open(dynamic_path) as f:
264
  dynamic_data = json.load(f)
265
 
 
266
  eval_results = {}
267
  for model_result_filepath in model_result_filepaths:
268
  # Creation of result
@@ -279,10 +281,21 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
279
  else:
280
  eval_results[eval_name] = eval_result
281
 
 
 
 
 
 
 
 
 
282
  results = []
283
  for v in eval_results.values():
284
  try:
285
  if v.status in ["FINISHED", "PENDING_NEW_EVAL" if SHOW_INCOMPLETE_EVALS else "FINISHED"] and not v.hidden:
 
 
 
286
  v.to_dict() # we test if the dict version is complete
287
  results.append(v)
288
  except KeyError as e: # not all eval values present
 
43
  eval_time: float = 0.0
44
  original_benchmark_average: float = None
45
  hidden: bool = False # Do not show on the leaderboard
46
+ num_evals_model_rev: int = 1
47
 
48
  @classmethod
49
  def init_from_json_file(self, json_filepath, is_original=False):
 
189
  AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
190
  AutoEvalColumn.weight_type.name: self.weight_type.value.name,
191
  AutoEvalColumn.architecture.name: self.architecture,
192
+ AutoEvalColumn.model.name: make_clickable_model(self.full_model, self.json_filename, revision=self.revision, precision=self.precision.value.name, num_evals_same_model=self.num_evals_model_rev),
193
  AutoEvalColumn.dummy.name: self.full_model,
194
  AutoEvalColumn.revision.name: self.revision,
195
  AutoEvalColumn.average.name: average,
 
264
  with open(dynamic_path) as f:
265
  dynamic_data = json.load(f)
266
 
267
+ count_model_rev = {}
268
  eval_results = {}
269
  for model_result_filepath in model_result_filepaths:
270
  # Creation of result
 
281
  else:
282
  eval_results[eval_name] = eval_result
283
 
284
+ #count model_revision to display precision if duplicate
285
+ if eval_result.status in ["FINISHED", "PENDING_NEW_EVAL" if SHOW_INCOMPLETE_EVALS else "FINISHED"] and not eval_result.hidden:
286
+ model_rev_key = f"{eval_result.full_model}_{eval_result.revision}"
287
+ if model_rev_key not in count_model_rev:
288
+ count_model_rev[model_rev_key] = 1
289
+ else:
290
+ count_model_rev[model_rev_key] += 1
291
+
292
  results = []
293
  for v in eval_results.values():
294
  try:
295
  if v.status in ["FINISHED", "PENDING_NEW_EVAL" if SHOW_INCOMPLETE_EVALS else "FINISHED"] and not v.hidden:
296
+ model_rev_key = f"{v.full_model}_{v.revision}"
297
+ v.num_evals_model_rev = count_model_rev[model_rev_key]
298
+
299
  v.to_dict() # we test if the dict version is complete
300
  results.append(v)
301
  except KeyError as e: # not all eval values present