open_pt_llm_leaderboard

Running on CPU Upgrade

App Files Files Community

eduagarcia commited on Apr 21

Commit

59399bc

•

1 Parent(s): 4717ca8

Make model text exibit precision if there's more than one eval or precision is not float16 or bfloat16

Browse files

Files changed (2) hide show

src/display/formatting.py +12 -3
src/leaderboard/read_evals.py +14 -1

src/display/formatting.py CHANGED Viewed

@@ -24,7 +24,7 @@ def make_requests_clickable_model(model_name, json_path=None):
     return model_hyperlink(link, model_name) + "  " + model_hyperlink(details_link, "📑")
-def make_clickable_model(model_name, json_path=None, revision=None):
     link = f"https://huggingface.co/{model_name}"
     #details_model_name = model_name.replace("/", "__")
@@ -35,11 +35,20 @@ def make_clickable_model(model_name, json_path=None, revision=None):
         if json_path is not None:
             details_link = f"https://huggingface.co/datasets/{RESULTS_REPO}/blob/main/{model_name}/{json_path}"
     if revision is not None and revision != "" and revision != "main":
         if len(revision) > 12:
             revision = revision[:7]
-        model_name += f" (rev: {revision})"
-    return model_hyperlink(link, model_name) + "  " + model_hyperlink(details_link, "📑")
 def styled_error(error):

     return model_hyperlink(link, model_name) + "  " + model_hyperlink(details_link, "📑")
+def make_clickable_model(model_name, json_path=None, revision=None, precision=None, num_evals_same_model=1):
     link = f"https://huggingface.co/{model_name}"
     #details_model_name = model_name.replace("/", "__")
         if json_path is not None:
             details_link = f"https://huggingface.co/datasets/{RESULTS_REPO}/blob/main/{model_name}/{json_path}"
+    posfix = ""
     if revision is not None and revision != "" and revision != "main":
         if len(revision) > 12:
             revision = revision[:7]
+        posfix += f" (rev: {revision})"
+    if precision is not None:
+        if num_evals_same_model == 1 and precision in ['float16', 'bfloat16']:
+            pass
+        else:
+            #if precision not in model_name:
+            posfix += f" [{precision}]"
+    posfix = posfix.strip()
+    return model_hyperlink(link, model_name) + "  " + model_hyperlink(details_link, "📑") + " " + posfix
 def styled_error(error):

src/leaderboard/read_evals.py CHANGED Viewed

@@ -43,6 +43,7 @@ class EvalResult:
     eval_time: float = 0.0
     original_benchmark_average: float = None
     hidden: bool = False # Do not show on the leaderboard
     @classmethod
     def init_from_json_file(self, json_filepath, is_original=False):
@@ -188,7 +189,7 @@ class EvalResult:
             AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
             AutoEvalColumn.weight_type.name: self.weight_type.value.name,
             AutoEvalColumn.architecture.name: self.architecture,
-            AutoEvalColumn.model.name: make_clickable_model(self.full_model, self.json_filename, revision=self.revision),
             AutoEvalColumn.dummy.name: self.full_model,
             AutoEvalColumn.revision.name: self.revision,
             AutoEvalColumn.average.name: average,
@@ -263,6 +264,7 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
     with open(dynamic_path) as f:
         dynamic_data = json.load(f)
     eval_results = {}
     for model_result_filepath in model_result_filepaths:
         # Creation of result
@@ -279,10 +281,21 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
         else:
             eval_results[eval_name] = eval_result
     results = []
     for v in eval_results.values():
         try:
             if v.status in ["FINISHED", "PENDING_NEW_EVAL" if SHOW_INCOMPLETE_EVALS else "FINISHED"] and not v.hidden:
                 v.to_dict() # we test if the dict version is complete
                 results.append(v)
         except KeyError as e:  # not all eval values present

     eval_time: float = 0.0
     original_benchmark_average: float = None
     hidden: bool = False # Do not show on the leaderboard
+    num_evals_model_rev: int = 1
     @classmethod
     def init_from_json_file(self, json_filepath, is_original=False):
             AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
             AutoEvalColumn.weight_type.name: self.weight_type.value.name,
             AutoEvalColumn.architecture.name: self.architecture,
+            AutoEvalColumn.model.name: make_clickable_model(self.full_model, self.json_filename, revision=self.revision, precision=self.precision.value.name, num_evals_same_model=self.num_evals_model_rev),
             AutoEvalColumn.dummy.name: self.full_model,
             AutoEvalColumn.revision.name: self.revision,
             AutoEvalColumn.average.name: average,
     with open(dynamic_path) as f:
         dynamic_data = json.load(f)
+    count_model_rev = {}
     eval_results = {}
     for model_result_filepath in model_result_filepaths:
         # Creation of result
         else:
             eval_results[eval_name] = eval_result
+            #count model_revision to display precision if duplicate
+            if eval_result.status in ["FINISHED", "PENDING_NEW_EVAL" if SHOW_INCOMPLETE_EVALS else "FINISHED"] and not eval_result.hidden:
+                model_rev_key = f"{eval_result.full_model}_{eval_result.revision}"
+                if model_rev_key not in count_model_rev:
+                    count_model_rev[model_rev_key] = 1
+                else:
+                    count_model_rev[model_rev_key] += 1
     results = []
     for v in eval_results.values():
         try:
             if v.status in ["FINISHED", "PENDING_NEW_EVAL" if SHOW_INCOMPLETE_EVALS else "FINISHED"] and not v.hidden:
+                model_rev_key = f"{v.full_model}_{v.revision}"
+                v.num_evals_model_rev = count_model_rev[model_rev_key]
                 v.to_dict() # we test if the dict version is complete
                 results.append(v)
         except KeyError as e:  # not all eval values present