Spaces:
Running
Running
inoki-giskard
commited on
Commit
·
01c4e21
1
Parent(s):
583defc
Output label mapping in column mapping
Browse files
app.py
CHANGED
@@ -6,6 +6,8 @@ import os
|
|
6 |
import time
|
7 |
from pathlib import Path
|
8 |
|
|
|
|
|
9 |
import pandas as pd
|
10 |
|
11 |
from transformers.pipelines import TextClassificationPipeline
|
@@ -84,15 +86,27 @@ def text_classification_map_model_and_dataset_labels(id2label, dataset_features)
|
|
84 |
return id2label_mapping
|
85 |
|
86 |
|
87 |
-
def try_validate(model_id, dataset_id, dataset_config, dataset_split):
|
88 |
# Validate model
|
89 |
m_id, ppl = check_model(model_id=model_id)
|
90 |
if m_id is None:
|
91 |
gr.Warning(f'Model "{model_id}" is not accessible. Please set your HF_TOKEN if it is a private model.')
|
92 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
if isinstance(ppl, Exception):
|
94 |
gr.Warning(f'Failed to load "{model_id} model": {ppl}')
|
95 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
# Validate dataset
|
98 |
d_id, config, split = check_dataset(dataset_id=dataset_id, dataset_config=dataset_config, dataset_split=dataset_split)
|
@@ -110,12 +124,23 @@ def try_validate(model_id, dataset_id, dataset_config, dataset_split):
|
|
110 |
dataset_ok = True
|
111 |
|
112 |
if not dataset_ok:
|
113 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
# TODO: Validate column mapping by running once
|
116 |
prediction_result = {}
|
117 |
id2label_df = None
|
118 |
if isinstance(ppl, TextClassificationPipeline):
|
|
|
|
|
|
|
|
|
|
|
119 |
# Retrieve all labels
|
120 |
id2label_mapping = {}
|
121 |
try:
|
@@ -137,15 +162,27 @@ def try_validate(model_id, dataset_id, dataset_config, dataset_split):
|
|
137 |
"Model labels": [id2label[label] for label in id2label.keys()],
|
138 |
"Dataset labels": [id2label_mapping[id2label[label]] for label in id2label.keys()],
|
139 |
})
|
|
|
|
|
|
|
|
|
140 |
except AttributeError:
|
141 |
# Dataset does not have features
|
142 |
pass
|
143 |
|
|
|
|
|
144 |
del ppl
|
145 |
|
146 |
gr.Info("Model and dataset validations passed. Your can submit the evaluation task.")
|
147 |
|
148 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
|
151 |
def try_submit(m_id, d_id, config, split, local):
|
@@ -240,6 +277,18 @@ with gr.Blocks(theme=theme) as iface:
|
|
240 |
|
241 |
id2label_mapping_dataframe = gr.DataFrame(visible=False)
|
242 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
with gr.Row():
|
244 |
validate_btn = gr.Button("Validate model and dataset", variant="primary")
|
245 |
run_btn = gr.Button(
|
@@ -254,6 +303,7 @@ with gr.Blocks(theme=theme) as iface:
|
|
254 |
dataset_id_input,
|
255 |
dataset_config_input,
|
256 |
dataset_split_input,
|
|
|
257 |
],
|
258 |
outputs=[
|
259 |
dataset_config_input,
|
@@ -261,6 +311,7 @@ with gr.Blocks(theme=theme) as iface:
|
|
261 |
run_btn,
|
262 |
example_labels,
|
263 |
id2label_mapping_dataframe,
|
|
|
264 |
],
|
265 |
)
|
266 |
run_btn.click(
|
|
|
6 |
import time
|
7 |
from pathlib import Path
|
8 |
|
9 |
+
import json
|
10 |
+
|
11 |
import pandas as pd
|
12 |
|
13 |
from transformers.pipelines import TextClassificationPipeline
|
|
|
86 |
return id2label_mapping
|
87 |
|
88 |
|
89 |
+
def try_validate(model_id, dataset_id, dataset_config, dataset_split, column_mapping):
|
90 |
# Validate model
|
91 |
m_id, ppl = check_model(model_id=model_id)
|
92 |
if m_id is None:
|
93 |
gr.Warning(f'Model "{model_id}" is not accessible. Please set your HF_TOKEN if it is a private model.')
|
94 |
+
return (
|
95 |
+
dataset_config, dataset_split,
|
96 |
+
gr.update(interactive=False), # Submit button
|
97 |
+
gr.update(visible=False), # Model prediction preview
|
98 |
+
gr.update(visible=False), # Label mapping preview
|
99 |
+
gr.update(visible=True), # Column mapping
|
100 |
+
)
|
101 |
if isinstance(ppl, Exception):
|
102 |
gr.Warning(f'Failed to load "{model_id} model": {ppl}')
|
103 |
+
return (
|
104 |
+
dataset_config, dataset_split,
|
105 |
+
gr.update(interactive=False), # Submit button
|
106 |
+
gr.update(visible=False), # Model prediction preview
|
107 |
+
gr.update(visible=False), # Label mapping preview
|
108 |
+
gr.update(visible=True), # Column mapping
|
109 |
+
)
|
110 |
|
111 |
# Validate dataset
|
112 |
d_id, config, split = check_dataset(dataset_id=dataset_id, dataset_config=dataset_config, dataset_split=dataset_split)
|
|
|
124 |
dataset_ok = True
|
125 |
|
126 |
if not dataset_ok:
|
127 |
+
return (
|
128 |
+
config, split,
|
129 |
+
gr.update(interactive=False), # Submit button
|
130 |
+
gr.update(visible=False), # Model prediction preview
|
131 |
+
gr.update(visible=False), # Label mapping preview
|
132 |
+
gr.update(visible=True), # Column mapping
|
133 |
+
)
|
134 |
|
135 |
# TODO: Validate column mapping by running once
|
136 |
prediction_result = {}
|
137 |
id2label_df = None
|
138 |
if isinstance(ppl, TextClassificationPipeline):
|
139 |
+
try:
|
140 |
+
column_mapping = json.loads(column_mapping)
|
141 |
+
except Exception:
|
142 |
+
column_mapping = {}
|
143 |
+
|
144 |
# Retrieve all labels
|
145 |
id2label_mapping = {}
|
146 |
try:
|
|
|
162 |
"Model labels": [id2label[label] for label in id2label.keys()],
|
163 |
"Dataset labels": [id2label_mapping[id2label[label]] for label in id2label.keys()],
|
164 |
})
|
165 |
+
if "label" not in column_mapping.keys():
|
166 |
+
column_mapping["label"] = {
|
167 |
+
i: id2label_mapping[id2label[i]] for i in id2label.keys()
|
168 |
+
}
|
169 |
except AttributeError:
|
170 |
# Dataset does not have features
|
171 |
pass
|
172 |
|
173 |
+
column_mapping = json.dumps(column_mapping, indent=2)
|
174 |
+
|
175 |
del ppl
|
176 |
|
177 |
gr.Info("Model and dataset validations passed. Your can submit the evaluation task.")
|
178 |
|
179 |
+
return (
|
180 |
+
config, split,
|
181 |
+
gr.update(interactive=True), # Submit button
|
182 |
+
gr.update(value=prediction_result, visible=True), # Model prediction preview
|
183 |
+
gr.update(value=id2label_df, visible=True), # Label mapping preview
|
184 |
+
gr.update(value=column_mapping, visible=True, interactive=True), # Column mapping
|
185 |
+
)
|
186 |
|
187 |
|
188 |
def try_submit(m_id, d_id, config, split, local):
|
|
|
277 |
|
278 |
id2label_mapping_dataframe = gr.DataFrame(visible=False)
|
279 |
|
280 |
+
with gr.Row():
|
281 |
+
column_mapping_input = gr.Textbox(
|
282 |
+
value="",
|
283 |
+
lines=5,
|
284 |
+
label="Column mapping",
|
285 |
+
placeholder="Description of mapping of columns in model to dataset, in json format, e.g.:\n"
|
286 |
+
'{\n'
|
287 |
+
' "text": "context",\n'
|
288 |
+
' "label": {0: "Positive", 1: "Negative"}\n'
|
289 |
+
'}',
|
290 |
+
)
|
291 |
+
|
292 |
with gr.Row():
|
293 |
validate_btn = gr.Button("Validate model and dataset", variant="primary")
|
294 |
run_btn = gr.Button(
|
|
|
303 |
dataset_id_input,
|
304 |
dataset_config_input,
|
305 |
dataset_split_input,
|
306 |
+
column_mapping_input,
|
307 |
],
|
308 |
outputs=[
|
309 |
dataset_config_input,
|
|
|
311 |
run_btn,
|
312 |
example_labels,
|
313 |
id2label_mapping_dataframe,
|
314 |
+
column_mapping_input,
|
315 |
],
|
316 |
)
|
317 |
run_btn.click(
|