giskard-evaluator

Running

App Files Files Community

200

inoki-giskard commited on Dec 1, 2023

Commit

01c4e21

1 Parent(s): 583defc

Output label mapping in column mapping

Browse files

Files changed (1) hide show

app.py +56 -5

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ import os
 import time
 from pathlib import Path
 import pandas as pd
 from transformers.pipelines import TextClassificationPipeline
@@ -84,15 +86,27 @@ def text_classification_map_model_and_dataset_labels(id2label, dataset_features)
     return id2label_mapping
-def try_validate(model_id, dataset_id, dataset_config, dataset_split):
     # Validate model
     m_id, ppl = check_model(model_id=model_id)
     if m_id is None:
         gr.Warning(f'Model "{model_id}" is not accessible. Please set your HF_TOKEN if it is a private model.')
-        return dataset_config, dataset_split, gr.update(interactive=False), gr.update(visible=False), gr.update(visible=False)
     if isinstance(ppl, Exception):
         gr.Warning(f'Failed to load "{model_id} model": {ppl}')
-        return dataset_config, dataset_split, gr.update(interactive=False), gr.update(visible=False), gr.update(visible=False)
     # Validate dataset
     d_id, config, split = check_dataset(dataset_id=dataset_id, dataset_config=dataset_config, dataset_split=dataset_split)
@@ -110,12 +124,23 @@ def try_validate(model_id, dataset_id, dataset_config, dataset_split):
         dataset_ok = True
     if not dataset_ok:
-        return config, split, gr.update(interactive=False), gr.update(visible=False), gr.update(visible=False)
     # TODO: Validate column mapping by running once
     prediction_result = {}
     id2label_df = None
     if isinstance(ppl, TextClassificationPipeline):
         # Retrieve all labels
         id2label_mapping = {}
         try:
@@ -137,15 +162,27 @@ def try_validate(model_id, dataset_id, dataset_config, dataset_split):
                 "Model labels": [id2label[label] for label in id2label.keys()],
                 "Dataset labels": [id2label_mapping[id2label[label]] for label in id2label.keys()],
             })
         except AttributeError:
             # Dataset does not have features
             pass
     del ppl
     gr.Info("Model and dataset validations passed. Your can submit the evaluation task.")
-    return config, split, gr.update(interactive=True), gr.update(value=prediction_result, visible=True), gr.update(value=id2label_df, visible=True)
 def try_submit(m_id, d_id, config, split, local):
@@ -240,6 +277,18 @@ with gr.Blocks(theme=theme) as iface:
             id2label_mapping_dataframe = gr.DataFrame(visible=False)
     with gr.Row():
         validate_btn = gr.Button("Validate model and dataset", variant="primary")
         run_btn = gr.Button(
@@ -254,6 +303,7 @@ with gr.Blocks(theme=theme) as iface:
                 dataset_id_input,
                 dataset_config_input,
                 dataset_split_input,
             ],
             outputs=[
                 dataset_config_input,
@@ -261,6 +311,7 @@ with gr.Blocks(theme=theme) as iface:
                 run_btn,
                 example_labels,
                 id2label_mapping_dataframe,
             ],
         )
         run_btn.click(

 import time
 from pathlib import Path
+import json
 import pandas as pd
 from transformers.pipelines import TextClassificationPipeline
     return id2label_mapping
+def try_validate(model_id, dataset_id, dataset_config, dataset_split, column_mapping):
     # Validate model
     m_id, ppl = check_model(model_id=model_id)
     if m_id is None:
         gr.Warning(f'Model "{model_id}" is not accessible. Please set your HF_TOKEN if it is a private model.')
+        return (
+            dataset_config, dataset_split,
+            gr.update(interactive=False),   # Submit button
+            gr.update(visible=False),       # Model prediction preview
+            gr.update(visible=False),       # Label mapping preview
+            gr.update(visible=True),        # Column mapping
+        )
     if isinstance(ppl, Exception):
         gr.Warning(f'Failed to load "{model_id} model": {ppl}')
+        return (
+            dataset_config, dataset_split,
+            gr.update(interactive=False),   # Submit button
+            gr.update(visible=False),       # Model prediction preview
+            gr.update(visible=False),       # Label mapping preview
+            gr.update(visible=True),        # Column mapping
+        )
     # Validate dataset
     d_id, config, split = check_dataset(dataset_id=dataset_id, dataset_config=dataset_config, dataset_split=dataset_split)
         dataset_ok = True
     if not dataset_ok:
+        return (
+            config, split,
+            gr.update(interactive=False),   # Submit button
+            gr.update(visible=False),       # Model prediction preview
+            gr.update(visible=False),       # Label mapping preview
+            gr.update(visible=True),        # Column mapping
+        )
     # TODO: Validate column mapping by running once
     prediction_result = {}
     id2label_df = None
     if isinstance(ppl, TextClassificationPipeline):
+        try:
+            column_mapping = json.loads(column_mapping)
+        except Exception:
+            column_mapping = {}
         # Retrieve all labels
         id2label_mapping = {}
         try:
                 "Model labels": [id2label[label] for label in id2label.keys()],
                 "Dataset labels": [id2label_mapping[id2label[label]] for label in id2label.keys()],
             })
+            if "label" not in column_mapping.keys():
+                column_mapping["label"] = {
+                    i: id2label_mapping[id2label[i]] for i in id2label.keys()
+                }
         except AttributeError:
             # Dataset does not have features
             pass
+        column_mapping = json.dumps(column_mapping, indent=2)
     del ppl
     gr.Info("Model and dataset validations passed. Your can submit the evaluation task.")
+    return (
+        config, split,
+        gr.update(interactive=True),    # Submit button
+        gr.update(value=prediction_result, visible=True),   # Model prediction preview
+        gr.update(value=id2label_df, visible=True), # Label mapping preview
+        gr.update(value=column_mapping, visible=True, interactive=True),    # Column mapping
+    )
 def try_submit(m_id, d_id, config, split, local):
             id2label_mapping_dataframe = gr.DataFrame(visible=False)
+    with gr.Row():
+        column_mapping_input = gr.Textbox(
+            value="",
+            lines=5,
+            label="Column mapping",
+            placeholder="Description of mapping of columns in model to dataset, in json format, e.g.:\n"
+                        '{\n'
+                        '   "text": "context",\n'
+                        '   "label": {0: "Positive", 1: "Negative"}\n'
+                        '}',
+        )
     with gr.Row():
         validate_btn = gr.Button("Validate model and dataset", variant="primary")
         run_btn = gr.Button(
                 dataset_id_input,
                 dataset_config_input,
                 dataset_split_input,
+                column_mapping_input,
             ],
             outputs=[
                 dataset_config_input,
                 run_btn,
                 example_labels,
                 id2label_mapping_dataframe,
+                column_mapping_input,
             ],
         )
         run_btn.click(