tathagataraha commited on
Commit
b3eff40
·
1 Parent(s): 09b313f

[ADD] Submit form, upload requests to requests dataset

Browse files
.gitignore CHANGED
@@ -12,4 +12,6 @@ eval-queue-bk/
12
  eval-results-bk/
13
  eval-queue-local/
14
  eval-results-local/
 
 
15
  logs/
 
12
  eval-results-bk/
13
  eval-queue-local/
14
  eval-results-local/
15
+ medic-harness-requests/
16
+ medic-harness-results/
17
  logs/
app.py CHANGED
@@ -361,7 +361,7 @@ with demo:
361
  gr.Markdown(LLM_BENCHMARKS_TEXT_2, elem_classes="markdown-text")
362
  # gr.HTML(ENTITY_DISTRIBUTION_IMG, elem_classes="logo")
363
  gr.Markdown(LLM_BENCHMARKS_TEXT_3, elem_classes="markdown-text")
364
-
365
  with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=4):
366
  with gr.Column():
367
  with gr.Row():
@@ -407,16 +407,8 @@ with demo:
407
 
408
  with gr.Row():
409
  with gr.Column():
410
-
411
  model_name_textbox = gr.Textbox(label="Model name")
412
-
413
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
414
-
415
- model_arch = gr.Radio(
416
- choices=[t.to_str(" : ") for t in ModelArch if t != ModelArch.Unknown],
417
- label="Model Architecture",
418
- )
419
-
420
  model_type = gr.Dropdown(
421
  choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
422
  label="Model type",
@@ -426,29 +418,32 @@ with demo:
426
  )
427
 
428
  with gr.Column():
429
- label_normalization_map = gr.Textbox(lines=6, label="Label Normalization Map", placeholder=PLACEHOLDER_DATASET_WISE_NORMALIZATION_CONFIG)
430
- gliner_threshold = gr.Textbox(label="Threshold for GLiNER models", visible=False)
431
- gliner_tokenizer_bool = gr.Radio(
432
- choices=["True", "False"],
433
- label="Load GLiNER Tokenizer",
434
- visible=False
435
  )
436
- prompt_name = gr.Dropdown(
437
- choices=[prompt_template.value for prompt_template in PromptTemplateName],
438
- label="Prompt for generation",
439
  multiselect=False,
440
- value="HTML Highlighted Spans",
441
  interactive=True,
442
- visible=False
443
- )# should be a dropdown
444
-
445
- # parsing_function - this is tied to the prompt & therefore does not need to be specified
446
- # generation_parameters = gr.Textbox(label="Generation params in json format") just default for now
447
-
448
- model_arch.change(fn=change_submit_request_form, inputs=model_arch, outputs=[
449
- gliner_threshold,
450
- gliner_tokenizer_bool,
451
- prompt_name])
 
 
 
452
 
453
  submit_button = gr.Button("Submit Eval")
454
  submission_result = gr.Markdown()
@@ -456,15 +451,13 @@ with demo:
456
  add_new_eval,
457
  [
458
  model_name_textbox,
459
- # base_model_name_textbox,
460
  revision_name_textbox,
461
- model_arch,
462
- label_normalization_map,
463
- gliner_threshold,
464
- gliner_tokenizer_bool,
465
- prompt_name,
466
- # weight_type,
467
  model_type,
 
 
 
 
468
  ],
469
  submission_result,
470
  )
 
361
  gr.Markdown(LLM_BENCHMARKS_TEXT_2, elem_classes="markdown-text")
362
  # gr.HTML(ENTITY_DISTRIBUTION_IMG, elem_classes="logo")
363
  gr.Markdown(LLM_BENCHMARKS_TEXT_3, elem_classes="markdown-text")
364
+
365
  with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=4):
366
  with gr.Column():
367
  with gr.Row():
 
407
 
408
  with gr.Row():
409
  with gr.Column():
 
410
  model_name_textbox = gr.Textbox(label="Model name")
411
+ revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
 
 
 
 
 
 
 
412
  model_type = gr.Dropdown(
413
  choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
414
  label="Model type",
 
418
  )
419
 
420
  with gr.Column():
421
+ precision = gr.Dropdown(
422
+ choices=[i.value.name for i in Precision if i != Precision.Unknown],
423
+ label="Precision",
424
+ multiselect=False,
425
+ value="float16",
426
+ interactive=True,
427
  )
428
+ weight_type = gr.Dropdown(
429
+ choices=[i.value.name for i in WeightType],
430
+ label="Weights type",
431
  multiselect=False,
432
+ value=WeightType.Original.value.name,
433
  interactive=True,
434
+ )
435
+ base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)", interactive=False)
436
+ with gr.Row():
437
+ domain_specific_toggle = gr.Checkbox(
438
+ label="Domain specific",
439
+ value=False,
440
+ info="Is your model medically oriented?",
441
+ )
442
+ chat_template_toggle = gr.Checkbox(
443
+ label="Use chat template",
444
+ value=False,
445
+ info="Is your model a chat model?",
446
+ )
447
 
448
  submit_button = gr.Button("Submit Eval")
449
  submission_result = gr.Markdown()
 
451
  add_new_eval,
452
  [
453
  model_name_textbox,
454
+ base_model_name_textbox,
455
  revision_name_textbox,
 
 
 
 
 
 
456
  model_type,
457
+ domain_specific_toggle,
458
+ chat_template_toggle,
459
+ precision,
460
+ weight_type
461
  ],
462
  submission_result,
463
  )
medic-harness-results/meta-llama/Llama-3.1-8B-Instruct/results_2024-07-24T15:26:36Z.json CHANGED
@@ -3,7 +3,7 @@
3
  "model_name": "meta-llama/Llama-3.1-8B-Instruct",
4
  "revision": "main",
5
  "submitted_time": "2024-07-24 14:33:56+00:00",
6
- "model_type": "instruct-tuned",
7
  "num_params": 8000000000,
8
  "private": false,
9
  "evaluated_time": "2024-07-24T15:26:36Z"
 
3
  "model_name": "meta-llama/Llama-3.1-8B-Instruct",
4
  "revision": "main",
5
  "submitted_time": "2024-07-24 14:33:56+00:00",
6
+ "model_type": "instruction-tuned",
7
  "num_params": 8000000000,
8
  "private": false,
9
  "evaluated_time": "2024-07-24T15:26:36Z"
src/display/utils.py CHANGED
@@ -58,9 +58,9 @@ class EvalQueueColumn: # Queue column
58
  model = ColumnContent("model", "markdown", True)
59
  revision = ColumnContent("revision", "str", True)
60
  private = ColumnContent("private", "bool", True)
61
- architecture = ColumnContent("model_architecture", "bool", True)
62
- # precision = ColumnContent("precision", "str", True)
63
- # weight_type = ColumnContent("weight_type", "str", "Original")
64
  status = ColumnContent("status", "str", True)
65
 
66
 
@@ -73,12 +73,13 @@ class ModelDetails:
73
 
74
 
75
  class ModelType(Enum):
76
- ZEROSHOT = ModelDetails(name="zero-shot", symbol="⚫")
77
- FINETUNED = ModelDetails(name="fine-tuned", symbol="⚪")
78
  PT = ModelDetails(name="pretrained", symbol="🟢")
79
- FT = ModelDetails(name="fine-tuned", symbol="🔶")
80
- # IFT = ModelDetails(name="instruction-tuned", symbol="")
81
- # RL = ModelDetails(name="RL-tuned", symbol="🟦")
 
82
  Unknown = ModelDetails(name="", symbol="?")
83
 
84
  def to_str(self, separator=" "):
@@ -86,18 +87,20 @@ class ModelType(Enum):
86
 
87
  @staticmethod
88
  def from_str(type):
89
- if "zero-shot" in type or "⚫" in type:
90
- return ModelType.ZEROSHOT
91
- if "fine-tuned" in type or "⚪" in type:
92
- return ModelType.FINETUNED
93
  # if "fine-tuned" in type or "🔶" in type:
94
  # return ModelType.FT
95
- # if "pretrained" in type or "🟢" in type:
96
- # return ModelType.PT
97
  # if "RL-tuned" in type or "🟦" in type:
98
  # return ModelType.RL
99
- # if "instruction-tuned" in type or "⭕" in type:
100
- # return ModelType.IFT
 
 
101
  return ModelType.Unknown
102
 
103
  class ModelArch(Enum):
 
58
  model = ColumnContent("model", "markdown", True)
59
  revision = ColumnContent("revision", "str", True)
60
  private = ColumnContent("private", "bool", True)
61
+ model_type = ColumnContent("model_type", "str", True)
62
+ precision = ColumnContent("precision", "str", True)
63
+ weight_type = ColumnContent("weight_type", "str", "Original")
64
  status = ColumnContent("status", "str", True)
65
 
66
 
 
73
 
74
 
75
  class ModelType(Enum):
76
+ # ZEROSHOT = ModelDetails(name="zero-shot", symbol="⚫")
77
+ # FINETUNED = ModelDetails(name="fine-tuned", symbol="⚪")
78
  PT = ModelDetails(name="pretrained", symbol="🟢")
79
+ # FT = ModelDetails(name="fine-tuned", symbol="🔶")
80
+ # DS = ModelDetails(name="domain-specific", symbol="")
81
+ IFT = ModelDetails(name="instruction-tuned", symbol="")
82
+ RL = ModelDetails(name="preference-tuned", symbol="🟦")
83
  Unknown = ModelDetails(name="", symbol="?")
84
 
85
  def to_str(self, separator=" "):
 
87
 
88
  @staticmethod
89
  def from_str(type):
90
+ # if "zero-shot" in type or "⚫" in type:
91
+ # return ModelType.ZEROSHOT
92
+ # if "fine-tuned" in type or "⚪" in type:
93
+ # return ModelType.FINETUNED
94
  # if "fine-tuned" in type or "🔶" in type:
95
  # return ModelType.FT
96
+ if "pretrained" in type or "🟢" in type:
97
+ return ModelType.PT
98
  # if "RL-tuned" in type or "🟦" in type:
99
  # return ModelType.RL
100
+ if "instruction-tuned" in type or "⭕" in type:
101
+ return ModelType.IFT
102
+ # if "domain-specific" in type or "➕" in type:
103
+ # return ModelType.DS
104
  return ModelType.Unknown
105
 
106
  class ModelArch(Enum):
src/populate.py CHANGED
@@ -29,16 +29,13 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
29
  """Creates the different dataframes for the evaluation queues requestes"""
30
  entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
31
  all_evals = []
32
-
33
  for entry in entries:
34
  if ".json" in entry:
35
  file_path = os.path.join(save_path, entry)
36
  with open(file_path) as fp:
37
  data = json.load(fp)
38
-
39
  data[EvalQueueColumn.model.name] = make_clickable_model(data["model_name"])
40
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
41
-
42
  all_evals.append(data)
43
  elif ".md" not in entry:
44
  # this is a folder
 
29
  """Creates the different dataframes for the evaluation queues requestes"""
30
  entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
31
  all_evals = []
 
32
  for entry in entries:
33
  if ".json" in entry:
34
  file_path = os.path.join(save_path, entry)
35
  with open(file_path) as fp:
36
  data = json.load(fp)
 
37
  data[EvalQueueColumn.model.name] = make_clickable_model(data["model_name"])
38
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
 
39
  all_evals.append(data)
40
  elif ".md" not in entry:
41
  # this is a folder
src/submission/submit.py CHANGED
@@ -42,16 +42,13 @@ PLACEHOLDER_DATASET_WISE_NORMALIZATION_CONFIG = """{
42
 
43
  def add_new_eval(
44
  model: str,
45
- # base_model: str,
46
  revision: str,
47
- # precision: str,
48
- # weight_type: str,
49
- model_arch: str,
50
- label_normalization_map: str,
51
- gliner_threshold:str,
52
- gliner_tokenizer_bool:str,
53
- prompt_template_name:str,
54
  model_type: str,
 
 
 
 
55
  ):
56
  """
57
  Saves request if valid else returns the error.
@@ -85,22 +82,16 @@ def add_new_eval(
85
  if revision == "":
86
  revision = "main"
87
 
88
- # # Is the model on the hub?
89
- # if weight_type in ["Delta", "Adapter"]:
90
- # base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True)
91
- # if not base_model_on_hub:
92
- # return styled_error(f'Base model "{base_model}" {error}')
93
 
94
- if not model_arch == "GLiNER Encoder":
95
  model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
96
  if not model_on_hub:
97
  return styled_error(f'Model "{model}" {error}')
98
- else:
99
- model_name_matches = list(API.list_models(model_name=model))
100
- if len(model_name_matches) < 1:
101
- return styled_error(f'Model "{model}" does not exist on the hub!')
102
- elif model_name_matches[0].id != model:
103
- return styled_error(f'Model "{model}" does not exist on the hub! There might be a typo in the name')
104
 
105
 
106
  # Is the model info correctly filled?
@@ -122,39 +113,15 @@ def add_new_eval(
122
  return styled_error(error_msg)
123
 
124
  # Verify the inference config now
125
- try:
126
- label_normalization_map = ast.literal_eval(label_normalization_map)
127
- except Exception as e:
128
- return styled_error("Please enter a valid json for the labe; normalization map")
129
-
130
- inference_config = {
131
- # "model_arch" : model_arch,
132
- "label_normalization_map": label_normalization_map,
133
- }
134
 
135
- match model_arch:
136
- case "Encoder":
137
- pass
138
- case "Decoder":
139
- if not prompt_template_name in [prompt_template.value for prompt_template in PromptTemplateName]:
140
- return styled_error("Prompt template name is invalid")
141
- inference_config = {
142
- **inference_config,
143
- "prompt_template_identifier": prompt_template_name,
144
- }
145
- case "GLiNER Encoder":
146
- try:
147
- gliner_threshold = float(gliner_threshold)
148
- gliner_tokenizer_bool = ast.literal_eval(gliner_tokenizer_bool)
149
- inference_config = {
150
- **inference_config,
151
- "gliner_threshold": gliner_threshold,
152
- "gliner_tokenizer_bool" : gliner_tokenizer_bool
153
- }
154
- except Exception as e:
155
- return styled_error("Please enter a valid float for the threshold")
156
- case _:
157
- return styled_error("Model Architecture is invalid")
158
 
159
  # Seems good, creating the eval
160
  print("Adding new eval")
@@ -162,11 +129,10 @@ def add_new_eval(
162
 
163
  eval_entry = {
164
  "model_name": model,
165
- # "base_model": base_model,
166
  "revision": revision,
167
- # "precision": precision,
168
- # "weight_type": weight_type,
169
- "model_architecture": model_arch,
170
  "status": "PENDING",
171
  "submitted_time": current_time,
172
  "model_type": model_type,
@@ -174,18 +140,17 @@ def add_new_eval(
174
  "num_params": model_size,
175
  "license": license,
176
  "private": False,
177
- "inference_config":inference_config,
178
  }
179
 
180
  # Check for duplicate submission
181
 
182
- if f"{model}_{revision}" in REQUESTED_MODELS:
183
  return styled_warning("This model has been already submitted. Add the revision if the model has been updated.")
184
 
185
  print("Creating eval file")
186
  OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
187
  os.makedirs(OUT_DIR, exist_ok=True)
188
- out_path = f"{OUT_DIR}/{model_path}_{revision}_eval_request.json"
189
 
190
  with open(out_path, "w") as f:
191
  f.write(json.dumps(eval_entry))
@@ -193,7 +158,7 @@ def add_new_eval(
193
  print("Uploading eval file")
194
  API.upload_file(
195
  path_or_fileobj=out_path,
196
- path_in_repo=out_path.split("eval-queue/")[1],
197
  repo_id=QUEUE_REPO,
198
  repo_type="dataset",
199
  commit_message=f"Add {model} to eval queue",
 
42
 
43
  def add_new_eval(
44
  model: str,
45
+ base_model: str,
46
  revision: str,
 
 
 
 
 
 
 
47
  model_type: str,
48
+ domain_specific: bool,
49
+ chat_template: bool,
50
+ precision: str,
51
+ weight_type: str,
52
  ):
53
  """
54
  Saves request if valid else returns the error.
 
82
  if revision == "":
83
  revision = "main"
84
 
85
+ # Is the model on the hub?
86
+ if weight_type in ["Delta", "Adapter"]:
87
+ base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True)
88
+ if not base_model_on_hub:
89
+ return styled_error(f'Base model "{base_model}" {error}')
90
 
91
+ if not weight_type == "Adapter":
92
  model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
93
  if not model_on_hub:
94
  return styled_error(f'Model "{model}" {error}')
 
 
 
 
 
 
95
 
96
 
97
  # Is the model info correctly filled?
 
113
  return styled_error(error_msg)
114
 
115
  # Verify the inference config now
116
+ # try:
117
+ # label_normalization_map = ast.literal_eval(label_normalization_map)
118
+ # except Exception as e:
119
+ # return styled_error("Please enter a valid json for the labe; normalization map")
 
 
 
 
 
120
 
121
+ # inference_config = {
122
+ # # "model_arch" : model_arch,
123
+ # "label_normalization_map": label_normalization_map,
124
+ # }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  # Seems good, creating the eval
127
  print("Adding new eval")
 
129
 
130
  eval_entry = {
131
  "model_name": model,
132
+ "base_model": base_model,
133
  "revision": revision,
134
+ "precision": precision,
135
+ "weight_type": weight_type,
 
136
  "status": "PENDING",
137
  "submitted_time": current_time,
138
  "model_type": model_type,
 
140
  "num_params": model_size,
141
  "license": license,
142
  "private": False,
 
143
  }
144
 
145
  # Check for duplicate submission
146
 
147
+ if f"{model}_{revision}_{precision}" in REQUESTED_MODELS:
148
  return styled_warning("This model has been already submitted. Add the revision if the model has been updated.")
149
 
150
  print("Creating eval file")
151
  OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
152
  os.makedirs(OUT_DIR, exist_ok=True)
153
+ out_path = f"{OUT_DIR}/{model_path}_{revision}_{precision}_{weight_type}_eval_request.json"
154
 
155
  with open(out_path, "w") as f:
156
  f.write(json.dumps(eval_entry))
 
158
  print("Uploading eval file")
159
  API.upload_file(
160
  path_or_fileobj=out_path,
161
+ path_in_repo=out_path.split(f"{EVAL_REQUESTS_PATH}/")[1],
162
  repo_id=QUEUE_REPO,
163
  repo_type="dataset",
164
  commit_message=f"Add {model} to eval queue",