Spaces:
Running
Running
tathagataraha
commited on
Commit
·
b3eff40
1
Parent(s):
09b313f
[ADD] Submit form, upload requests to requests dataset
Browse files- .gitignore +2 -0
- app.py +30 -37
- medic-harness-results/meta-llama/Llama-3.1-8B-Instruct/results_2024-07-24T15:26:36Z.json +1 -1
- src/display/utils.py +19 -16
- src/populate.py +0 -3
- src/submission/submit.py +25 -60
.gitignore
CHANGED
@@ -12,4 +12,6 @@ eval-queue-bk/
|
|
12 |
eval-results-bk/
|
13 |
eval-queue-local/
|
14 |
eval-results-local/
|
|
|
|
|
15 |
logs/
|
|
|
12 |
eval-results-bk/
|
13 |
eval-queue-local/
|
14 |
eval-results-local/
|
15 |
+
medic-harness-requests/
|
16 |
+
medic-harness-results/
|
17 |
logs/
|
app.py
CHANGED
@@ -361,7 +361,7 @@ with demo:
|
|
361 |
gr.Markdown(LLM_BENCHMARKS_TEXT_2, elem_classes="markdown-text")
|
362 |
# gr.HTML(ENTITY_DISTRIBUTION_IMG, elem_classes="logo")
|
363 |
gr.Markdown(LLM_BENCHMARKS_TEXT_3, elem_classes="markdown-text")
|
364 |
-
|
365 |
with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=4):
|
366 |
with gr.Column():
|
367 |
with gr.Row():
|
@@ -407,16 +407,8 @@ with demo:
|
|
407 |
|
408 |
with gr.Row():
|
409 |
with gr.Column():
|
410 |
-
|
411 |
model_name_textbox = gr.Textbox(label="Model name")
|
412 |
-
|
413 |
-
revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
|
414 |
-
|
415 |
-
model_arch = gr.Radio(
|
416 |
-
choices=[t.to_str(" : ") for t in ModelArch if t != ModelArch.Unknown],
|
417 |
-
label="Model Architecture",
|
418 |
-
)
|
419 |
-
|
420 |
model_type = gr.Dropdown(
|
421 |
choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
|
422 |
label="Model type",
|
@@ -426,29 +418,32 @@ with demo:
|
|
426 |
)
|
427 |
|
428 |
with gr.Column():
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
)
|
436 |
-
|
437 |
-
choices=[
|
438 |
-
label="
|
439 |
multiselect=False,
|
440 |
-
value=
|
441 |
interactive=True,
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
|
|
|
|
|
|
452 |
|
453 |
submit_button = gr.Button("Submit Eval")
|
454 |
submission_result = gr.Markdown()
|
@@ -456,15 +451,13 @@ with demo:
|
|
456 |
add_new_eval,
|
457 |
[
|
458 |
model_name_textbox,
|
459 |
-
|
460 |
revision_name_textbox,
|
461 |
-
model_arch,
|
462 |
-
label_normalization_map,
|
463 |
-
gliner_threshold,
|
464 |
-
gliner_tokenizer_bool,
|
465 |
-
prompt_name,
|
466 |
-
# weight_type,
|
467 |
model_type,
|
|
|
|
|
|
|
|
|
468 |
],
|
469 |
submission_result,
|
470 |
)
|
|
|
361 |
gr.Markdown(LLM_BENCHMARKS_TEXT_2, elem_classes="markdown-text")
|
362 |
# gr.HTML(ENTITY_DISTRIBUTION_IMG, elem_classes="logo")
|
363 |
gr.Markdown(LLM_BENCHMARKS_TEXT_3, elem_classes="markdown-text")
|
364 |
+
|
365 |
with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=4):
|
366 |
with gr.Column():
|
367 |
with gr.Row():
|
|
|
407 |
|
408 |
with gr.Row():
|
409 |
with gr.Column():
|
|
|
410 |
model_name_textbox = gr.Textbox(label="Model name")
|
411 |
+
revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
412 |
model_type = gr.Dropdown(
|
413 |
choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
|
414 |
label="Model type",
|
|
|
418 |
)
|
419 |
|
420 |
with gr.Column():
|
421 |
+
precision = gr.Dropdown(
|
422 |
+
choices=[i.value.name for i in Precision if i != Precision.Unknown],
|
423 |
+
label="Precision",
|
424 |
+
multiselect=False,
|
425 |
+
value="float16",
|
426 |
+
interactive=True,
|
427 |
)
|
428 |
+
weight_type = gr.Dropdown(
|
429 |
+
choices=[i.value.name for i in WeightType],
|
430 |
+
label="Weights type",
|
431 |
multiselect=False,
|
432 |
+
value=WeightType.Original.value.name,
|
433 |
interactive=True,
|
434 |
+
)
|
435 |
+
base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)", interactive=False)
|
436 |
+
with gr.Row():
|
437 |
+
domain_specific_toggle = gr.Checkbox(
|
438 |
+
label="Domain specific",
|
439 |
+
value=False,
|
440 |
+
info="Is your model medically oriented?",
|
441 |
+
)
|
442 |
+
chat_template_toggle = gr.Checkbox(
|
443 |
+
label="Use chat template",
|
444 |
+
value=False,
|
445 |
+
info="Is your model a chat model?",
|
446 |
+
)
|
447 |
|
448 |
submit_button = gr.Button("Submit Eval")
|
449 |
submission_result = gr.Markdown()
|
|
|
451 |
add_new_eval,
|
452 |
[
|
453 |
model_name_textbox,
|
454 |
+
base_model_name_textbox,
|
455 |
revision_name_textbox,
|
|
|
|
|
|
|
|
|
|
|
|
|
456 |
model_type,
|
457 |
+
domain_specific_toggle,
|
458 |
+
chat_template_toggle,
|
459 |
+
precision,
|
460 |
+
weight_type
|
461 |
],
|
462 |
submission_result,
|
463 |
)
|
medic-harness-results/meta-llama/Llama-3.1-8B-Instruct/results_2024-07-24T15:26:36Z.json
CHANGED
@@ -3,7 +3,7 @@
|
|
3 |
"model_name": "meta-llama/Llama-3.1-8B-Instruct",
|
4 |
"revision": "main",
|
5 |
"submitted_time": "2024-07-24 14:33:56+00:00",
|
6 |
-
"model_type": "
|
7 |
"num_params": 8000000000,
|
8 |
"private": false,
|
9 |
"evaluated_time": "2024-07-24T15:26:36Z"
|
|
|
3 |
"model_name": "meta-llama/Llama-3.1-8B-Instruct",
|
4 |
"revision": "main",
|
5 |
"submitted_time": "2024-07-24 14:33:56+00:00",
|
6 |
+
"model_type": "instruction-tuned",
|
7 |
"num_params": 8000000000,
|
8 |
"private": false,
|
9 |
"evaluated_time": "2024-07-24T15:26:36Z"
|
src/display/utils.py
CHANGED
@@ -58,9 +58,9 @@ class EvalQueueColumn: # Queue column
|
|
58 |
model = ColumnContent("model", "markdown", True)
|
59 |
revision = ColumnContent("revision", "str", True)
|
60 |
private = ColumnContent("private", "bool", True)
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
status = ColumnContent("status", "str", True)
|
65 |
|
66 |
|
@@ -73,12 +73,13 @@ class ModelDetails:
|
|
73 |
|
74 |
|
75 |
class ModelType(Enum):
|
76 |
-
ZEROSHOT = ModelDetails(name="zero-shot", symbol="⚫")
|
77 |
-
FINETUNED = ModelDetails(name="fine-tuned", symbol="⚪")
|
78 |
PT = ModelDetails(name="pretrained", symbol="🟢")
|
79 |
-
FT = ModelDetails(name="fine-tuned", symbol="🔶")
|
80 |
-
#
|
81 |
-
|
|
|
82 |
Unknown = ModelDetails(name="", symbol="?")
|
83 |
|
84 |
def to_str(self, separator=" "):
|
@@ -86,18 +87,20 @@ class ModelType(Enum):
|
|
86 |
|
87 |
@staticmethod
|
88 |
def from_str(type):
|
89 |
-
if "zero-shot" in type or "⚫" in type:
|
90 |
-
|
91 |
-
if "fine-tuned" in type or "⚪" in type:
|
92 |
-
|
93 |
# if "fine-tuned" in type or "🔶" in type:
|
94 |
# return ModelType.FT
|
95 |
-
|
96 |
-
|
97 |
# if "RL-tuned" in type or "🟦" in type:
|
98 |
# return ModelType.RL
|
99 |
-
|
100 |
-
|
|
|
|
|
101 |
return ModelType.Unknown
|
102 |
|
103 |
class ModelArch(Enum):
|
|
|
58 |
model = ColumnContent("model", "markdown", True)
|
59 |
revision = ColumnContent("revision", "str", True)
|
60 |
private = ColumnContent("private", "bool", True)
|
61 |
+
model_type = ColumnContent("model_type", "str", True)
|
62 |
+
precision = ColumnContent("precision", "str", True)
|
63 |
+
weight_type = ColumnContent("weight_type", "str", "Original")
|
64 |
status = ColumnContent("status", "str", True)
|
65 |
|
66 |
|
|
|
73 |
|
74 |
|
75 |
class ModelType(Enum):
|
76 |
+
# ZEROSHOT = ModelDetails(name="zero-shot", symbol="⚫")
|
77 |
+
# FINETUNED = ModelDetails(name="fine-tuned", symbol="⚪")
|
78 |
PT = ModelDetails(name="pretrained", symbol="🟢")
|
79 |
+
# FT = ModelDetails(name="fine-tuned", symbol="🔶")
|
80 |
+
# DS = ModelDetails(name="domain-specific", symbol="➕")
|
81 |
+
IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
|
82 |
+
RL = ModelDetails(name="preference-tuned", symbol="🟦")
|
83 |
Unknown = ModelDetails(name="", symbol="?")
|
84 |
|
85 |
def to_str(self, separator=" "):
|
|
|
87 |
|
88 |
@staticmethod
|
89 |
def from_str(type):
|
90 |
+
# if "zero-shot" in type or "⚫" in type:
|
91 |
+
# return ModelType.ZEROSHOT
|
92 |
+
# if "fine-tuned" in type or "⚪" in type:
|
93 |
+
# return ModelType.FINETUNED
|
94 |
# if "fine-tuned" in type or "🔶" in type:
|
95 |
# return ModelType.FT
|
96 |
+
if "pretrained" in type or "🟢" in type:
|
97 |
+
return ModelType.PT
|
98 |
# if "RL-tuned" in type or "🟦" in type:
|
99 |
# return ModelType.RL
|
100 |
+
if "instruction-tuned" in type or "⭕" in type:
|
101 |
+
return ModelType.IFT
|
102 |
+
# if "domain-specific" in type or "➕" in type:
|
103 |
+
# return ModelType.DS
|
104 |
return ModelType.Unknown
|
105 |
|
106 |
class ModelArch(Enum):
|
src/populate.py
CHANGED
@@ -29,16 +29,13 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
|
29 |
"""Creates the different dataframes for the evaluation queues requestes"""
|
30 |
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
|
31 |
all_evals = []
|
32 |
-
|
33 |
for entry in entries:
|
34 |
if ".json" in entry:
|
35 |
file_path = os.path.join(save_path, entry)
|
36 |
with open(file_path) as fp:
|
37 |
data = json.load(fp)
|
38 |
-
|
39 |
data[EvalQueueColumn.model.name] = make_clickable_model(data["model_name"])
|
40 |
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
41 |
-
|
42 |
all_evals.append(data)
|
43 |
elif ".md" not in entry:
|
44 |
# this is a folder
|
|
|
29 |
"""Creates the different dataframes for the evaluation queues requestes"""
|
30 |
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
|
31 |
all_evals = []
|
|
|
32 |
for entry in entries:
|
33 |
if ".json" in entry:
|
34 |
file_path = os.path.join(save_path, entry)
|
35 |
with open(file_path) as fp:
|
36 |
data = json.load(fp)
|
|
|
37 |
data[EvalQueueColumn.model.name] = make_clickable_model(data["model_name"])
|
38 |
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
|
|
39 |
all_evals.append(data)
|
40 |
elif ".md" not in entry:
|
41 |
# this is a folder
|
src/submission/submit.py
CHANGED
@@ -42,16 +42,13 @@ PLACEHOLDER_DATASET_WISE_NORMALIZATION_CONFIG = """{
|
|
42 |
|
43 |
def add_new_eval(
|
44 |
model: str,
|
45 |
-
|
46 |
revision: str,
|
47 |
-
# precision: str,
|
48 |
-
# weight_type: str,
|
49 |
-
model_arch: str,
|
50 |
-
label_normalization_map: str,
|
51 |
-
gliner_threshold:str,
|
52 |
-
gliner_tokenizer_bool:str,
|
53 |
-
prompt_template_name:str,
|
54 |
model_type: str,
|
|
|
|
|
|
|
|
|
55 |
):
|
56 |
"""
|
57 |
Saves request if valid else returns the error.
|
@@ -85,22 +82,16 @@ def add_new_eval(
|
|
85 |
if revision == "":
|
86 |
revision = "main"
|
87 |
|
88 |
-
#
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
|
94 |
-
if not
|
95 |
model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
|
96 |
if not model_on_hub:
|
97 |
return styled_error(f'Model "{model}" {error}')
|
98 |
-
else:
|
99 |
-
model_name_matches = list(API.list_models(model_name=model))
|
100 |
-
if len(model_name_matches) < 1:
|
101 |
-
return styled_error(f'Model "{model}" does not exist on the hub!')
|
102 |
-
elif model_name_matches[0].id != model:
|
103 |
-
return styled_error(f'Model "{model}" does not exist on the hub! There might be a typo in the name')
|
104 |
|
105 |
|
106 |
# Is the model info correctly filled?
|
@@ -122,39 +113,15 @@ def add_new_eval(
|
|
122 |
return styled_error(error_msg)
|
123 |
|
124 |
# Verify the inference config now
|
125 |
-
try:
|
126 |
-
|
127 |
-
except Exception as e:
|
128 |
-
|
129 |
-
|
130 |
-
inference_config = {
|
131 |
-
# "model_arch" : model_arch,
|
132 |
-
"label_normalization_map": label_normalization_map,
|
133 |
-
}
|
134 |
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
if not prompt_template_name in [prompt_template.value for prompt_template in PromptTemplateName]:
|
140 |
-
return styled_error("Prompt template name is invalid")
|
141 |
-
inference_config = {
|
142 |
-
**inference_config,
|
143 |
-
"prompt_template_identifier": prompt_template_name,
|
144 |
-
}
|
145 |
-
case "GLiNER Encoder":
|
146 |
-
try:
|
147 |
-
gliner_threshold = float(gliner_threshold)
|
148 |
-
gliner_tokenizer_bool = ast.literal_eval(gliner_tokenizer_bool)
|
149 |
-
inference_config = {
|
150 |
-
**inference_config,
|
151 |
-
"gliner_threshold": gliner_threshold,
|
152 |
-
"gliner_tokenizer_bool" : gliner_tokenizer_bool
|
153 |
-
}
|
154 |
-
except Exception as e:
|
155 |
-
return styled_error("Please enter a valid float for the threshold")
|
156 |
-
case _:
|
157 |
-
return styled_error("Model Architecture is invalid")
|
158 |
|
159 |
# Seems good, creating the eval
|
160 |
print("Adding new eval")
|
@@ -162,11 +129,10 @@ def add_new_eval(
|
|
162 |
|
163 |
eval_entry = {
|
164 |
"model_name": model,
|
165 |
-
|
166 |
"revision": revision,
|
167 |
-
|
168 |
-
|
169 |
-
"model_architecture": model_arch,
|
170 |
"status": "PENDING",
|
171 |
"submitted_time": current_time,
|
172 |
"model_type": model_type,
|
@@ -174,18 +140,17 @@ def add_new_eval(
|
|
174 |
"num_params": model_size,
|
175 |
"license": license,
|
176 |
"private": False,
|
177 |
-
"inference_config":inference_config,
|
178 |
}
|
179 |
|
180 |
# Check for duplicate submission
|
181 |
|
182 |
-
if f"{model}_{revision}" in REQUESTED_MODELS:
|
183 |
return styled_warning("This model has been already submitted. Add the revision if the model has been updated.")
|
184 |
|
185 |
print("Creating eval file")
|
186 |
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
187 |
os.makedirs(OUT_DIR, exist_ok=True)
|
188 |
-
out_path = f"{OUT_DIR}/{model_path}_{revision}_eval_request.json"
|
189 |
|
190 |
with open(out_path, "w") as f:
|
191 |
f.write(json.dumps(eval_entry))
|
@@ -193,7 +158,7 @@ def add_new_eval(
|
|
193 |
print("Uploading eval file")
|
194 |
API.upload_file(
|
195 |
path_or_fileobj=out_path,
|
196 |
-
path_in_repo=out_path.split("
|
197 |
repo_id=QUEUE_REPO,
|
198 |
repo_type="dataset",
|
199 |
commit_message=f"Add {model} to eval queue",
|
|
|
42 |
|
43 |
def add_new_eval(
|
44 |
model: str,
|
45 |
+
base_model: str,
|
46 |
revision: str,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
model_type: str,
|
48 |
+
domain_specific: bool,
|
49 |
+
chat_template: bool,
|
50 |
+
precision: str,
|
51 |
+
weight_type: str,
|
52 |
):
|
53 |
"""
|
54 |
Saves request if valid else returns the error.
|
|
|
82 |
if revision == "":
|
83 |
revision = "main"
|
84 |
|
85 |
+
# Is the model on the hub?
|
86 |
+
if weight_type in ["Delta", "Adapter"]:
|
87 |
+
base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True)
|
88 |
+
if not base_model_on_hub:
|
89 |
+
return styled_error(f'Base model "{base_model}" {error}')
|
90 |
|
91 |
+
if not weight_type == "Adapter":
|
92 |
model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
|
93 |
if not model_on_hub:
|
94 |
return styled_error(f'Model "{model}" {error}')
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
|
97 |
# Is the model info correctly filled?
|
|
|
113 |
return styled_error(error_msg)
|
114 |
|
115 |
# Verify the inference config now
|
116 |
+
# try:
|
117 |
+
# label_normalization_map = ast.literal_eval(label_normalization_map)
|
118 |
+
# except Exception as e:
|
119 |
+
# return styled_error("Please enter a valid json for the labe; normalization map")
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
+
# inference_config = {
|
122 |
+
# # "model_arch" : model_arch,
|
123 |
+
# "label_normalization_map": label_normalization_map,
|
124 |
+
# }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
# Seems good, creating the eval
|
127 |
print("Adding new eval")
|
|
|
129 |
|
130 |
eval_entry = {
|
131 |
"model_name": model,
|
132 |
+
"base_model": base_model,
|
133 |
"revision": revision,
|
134 |
+
"precision": precision,
|
135 |
+
"weight_type": weight_type,
|
|
|
136 |
"status": "PENDING",
|
137 |
"submitted_time": current_time,
|
138 |
"model_type": model_type,
|
|
|
140 |
"num_params": model_size,
|
141 |
"license": license,
|
142 |
"private": False,
|
|
|
143 |
}
|
144 |
|
145 |
# Check for duplicate submission
|
146 |
|
147 |
+
if f"{model}_{revision}_{precision}" in REQUESTED_MODELS:
|
148 |
return styled_warning("This model has been already submitted. Add the revision if the model has been updated.")
|
149 |
|
150 |
print("Creating eval file")
|
151 |
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
152 |
os.makedirs(OUT_DIR, exist_ok=True)
|
153 |
+
out_path = f"{OUT_DIR}/{model_path}_{revision}_{precision}_{weight_type}_eval_request.json"
|
154 |
|
155 |
with open(out_path, "w") as f:
|
156 |
f.write(json.dumps(eval_entry))
|
|
|
158 |
print("Uploading eval file")
|
159 |
API.upload_file(
|
160 |
path_or_fileobj=out_path,
|
161 |
+
path_in_repo=out_path.split(f"{EVAL_REQUESTS_PATH}/")[1],
|
162 |
repo_id=QUEUE_REPO,
|
163 |
repo_type="dataset",
|
164 |
commit_message=f"Add {model} to eval queue",
|