Clémentine commited on
Commit
20b060e
·
1 Parent(s): c841f87

adding merge check - super slow but at least info is displayed

Browse files
src/display/utils.py CHANGED
@@ -46,6 +46,7 @@ auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type",
46
  auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
47
  auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
48
  auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
 
49
  auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
50
  auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
51
  auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
@@ -72,6 +73,7 @@ baseline_row = {
72
  AutoEvalColumn.model.name: "<p>Baseline</p>",
73
  AutoEvalColumn.revision.name: "N/A",
74
  AutoEvalColumn.precision.name: None,
 
75
  AutoEvalColumn.average.name: 31.0,
76
  AutoEvalColumn.arc.name: 25.0,
77
  AutoEvalColumn.hellaswag.name: 25.0,
@@ -97,6 +99,7 @@ human_baseline_row = {
97
  AutoEvalColumn.revision.name: "N/A",
98
  AutoEvalColumn.precision.name: None,
99
  AutoEvalColumn.average.name: 92.75,
 
100
  AutoEvalColumn.arc.name: 80.0,
101
  AutoEvalColumn.hellaswag.name: 95.0,
102
  AutoEvalColumn.mmlu.name: 89.8,
 
46
  auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
47
  auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
48
  auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
49
+ auto_eval_column_dict.append(["merge", ColumnContent, ColumnContent("Merged", "bool", False)])
50
  auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
51
  auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
52
  auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
 
73
  AutoEvalColumn.model.name: "<p>Baseline</p>",
74
  AutoEvalColumn.revision.name: "N/A",
75
  AutoEvalColumn.precision.name: None,
76
+ AutoEvalColumn.merge.name: False,
77
  AutoEvalColumn.average.name: 31.0,
78
  AutoEvalColumn.arc.name: 25.0,
79
  AutoEvalColumn.hellaswag.name: 25.0,
 
99
  AutoEvalColumn.revision.name: "N/A",
100
  AutoEvalColumn.precision.name: None,
101
  AutoEvalColumn.average.name: 92.75,
102
+ AutoEvalColumn.merge.name: False,
103
  AutoEvalColumn.arc.name: 80.0,
104
  AutoEvalColumn.hellaswag.name: 95.0,
105
  AutoEvalColumn.mmlu.name: 89.8,
src/leaderboard/read_evals.py CHANGED
@@ -5,10 +5,10 @@ import os
5
  from dataclasses import dataclass
6
 
7
  import dateutil
8
- from datetime import datetime
9
- from transformers import AutoConfig
10
  import numpy as np
11
 
 
 
12
  from src.display.formatting import make_clickable_model
13
  from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
14
  from src.submission.check_validity import is_model_on_hub
@@ -32,6 +32,7 @@ class EvalResult:
32
  num_params: int = 0
33
  date: str = "" # submission date of request file
34
  still_on_hub: bool = False
 
35
 
36
  @classmethod
37
  def init_from_json_file(self, json_filepath):
@@ -59,6 +60,11 @@ class EvalResult:
59
  result_key = f"{org}_{model}_{precision.value.name}"
60
  full_model = "/".join(org_and_model)
61
 
 
 
 
 
 
62
  still_on_hub, error, model_config = is_model_on_hub(
63
  full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
64
  )
@@ -105,7 +111,8 @@ class EvalResult:
105
  precision=precision,
106
  revision= config.get("model_sha", ""),
107
  still_on_hub=still_on_hub,
108
- architecture=architecture
 
109
  )
110
 
111
  def update_with_request_file(self, requests_path):
@@ -131,6 +138,7 @@ class EvalResult:
131
  "eval_name": self.eval_name, # not a column, just a save name,
132
  AutoEvalColumn.precision.name: self.precision.value.name,
133
  AutoEvalColumn.model_type.name: self.model_type.value.name,
 
134
  AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
135
  AutoEvalColumn.weight_type.name: self.weight_type.value.name,
136
  AutoEvalColumn.architecture.name: self.architecture,
 
5
  from dataclasses import dataclass
6
 
7
  import dateutil
 
 
8
  import numpy as np
9
 
10
+ from huggingface_hub import ModelCard
11
+
12
  from src.display.formatting import make_clickable_model
13
  from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
14
  from src.submission.check_validity import is_model_on_hub
 
32
  num_params: int = 0
33
  date: str = "" # submission date of request file
34
  still_on_hub: bool = False
35
+ merge: bool = False
36
 
37
  @classmethod
38
  def init_from_json_file(self, json_filepath):
 
60
  result_key = f"{org}_{model}_{precision.value.name}"
61
  full_model = "/".join(org_and_model)
62
 
63
+ try:
64
+ merge = any(t in ["merge", "mergedlm"] for t in ModelCard.load(full_model).data.tags)
65
+ except Exception:
66
+ merge = False
67
+
68
  still_on_hub, error, model_config = is_model_on_hub(
69
  full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
70
  )
 
111
  precision=precision,
112
  revision= config.get("model_sha", ""),
113
  still_on_hub=still_on_hub,
114
+ architecture=architecture,
115
+ merge=merge
116
  )
117
 
118
  def update_with_request_file(self, requests_path):
 
138
  "eval_name": self.eval_name, # not a column, just a save name,
139
  AutoEvalColumn.precision.name: self.precision.value.name,
140
  AutoEvalColumn.model_type.name: self.model_type.value.name,
141
+ AutoEvalColumn.merge.name: self.merge,
142
  AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
143
  AutoEvalColumn.weight_type.name: self.weight_type.value.name,
144
  AutoEvalColumn.architecture.name: self.architecture,