giskard-evaluator

Sleeping

App Files Files Community

200

inoki-giskard

ZeroCommand commited on Dec 29, 2023

Commit

be473e6

1 Parent(s): 9e4233f

GSK-2352 create a leaderboard tab (#15)

Browse files

- add leaderboard ui and refactor code (cbb886ad64e513b28407c51bf6f505899f61264d)
- fix bugs and add logs for leaderboard tab (80ed307fea11eb9acb2fff7e6d96ab2fba7aa55c)
- fix cicd submodule (f0a313e7fe8c116d0fee6c8d224373e9057f9db3)
- hide dropdown menus when the labels match (ba41a5cd5b15c2062ecc33dc66f9109cd3f88148)
- hide dropdown menus when the labels match when labels not matching (5058ff34b4c55153b293ac6f00c1e9c50b9ca7c9)

Co-authored-by: zcy <ZeroCommand@users.noreply.huggingface.co>

Files changed (8) hide show

app.py +6 -4
app_leaderboard.py +98 -0
app_legacy.py +1 -1
app_text_classification.py +64 -26
cicd +1 -0
fetch_utils.py +26 -0
utils.py → io_utils.py +9 -10
wordings.py +8 -3

app.py CHANGED Viewed

@@ -5,11 +5,13 @@
 import gradio as gr
 from app_text_classification import get_demo as get_demo_text_classification
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
     with gr.Tab("Text Classification"):
         get_demo_text_classification()
-    with gr.Tab("Leaderboard - Text Classification"):
-        pass
-demo.launch()

 import gradio as gr
 from app_text_classification import get_demo as get_demo_text_classification
+from app_leaderboard import get_demo as get_demo_leaderboard
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
     with gr.Tab("Text Classification"):
         get_demo_text_classification()
+    with gr.Tab("Leaderboard"):
+        get_demo_leaderboard()
+demo.queue(max_size=100)
+demo.launch(share=False)

app_leaderboard.py CHANGED Viewed

	@@ -0,0 +1,98 @@

+import gradio as gr
+import datasets
+import logging
+from fetch_utils import check_dataset_and_get_config, check_dataset_and_get_split
+def get_records_from_dataset_repo(dataset_id):
+    dataset_config = check_dataset_and_get_config(dataset_id)
+    logging.info(f"Dataset {dataset_id} has configs {dataset_config}")
+    dataset_split = check_dataset_and_get_split(dataset_id, dataset_config[0])
+    logging.info(f"Dataset {dataset_id} has splits {dataset_split}")
+    try:
+        ds = datasets.load_dataset(dataset_id, dataset_config[0])[dataset_split[0]]
+        df = ds.to_pandas()
+        return df
+    except Exception as e:
+        logging.warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
+        return None
+def get_model_ids(ds):
+    logging.info(f"Dataset {ds} column names: {ds['model_id']}")
+    models = ds['model_id'].tolist()
+    # return unique elements in the list model_ids
+    model_ids = list(set(models))
+    return model_ids
+def get_dataset_ids(ds):
+    logging.info(f"Dataset {ds} column names: {ds['dataset_id']}")
+    datasets = ds['dataset_id'].tolist()
+    dataset_ids = list(set(datasets))
+    return dataset_ids
+def get_types(ds):
+    # set all types for each column
+    types = [str(t) for t in ds.dtypes.to_list()]
+    types = [t.replace('object', 'markdown') for t in types]
+    types = [t.replace('float64', 'number') for t in types]
+    types = [t.replace('int64', 'number') for t in types]
+    return types
+def get_display_df(df):
+    # style all elements in the model_id column
+    display_df = df.copy()
+    columns = display_df.columns.tolist()
+    if 'model_id' in columns:
+        display_df['model_id'] = display_df['model_id'].apply(lambda x: f'<p href="https://huggingface.co/{x}" style="color:blue">🔗{x}</p>')
+    # style all elements in the dataset_id column
+    if 'dataset_id' in columns:
+        display_df['dataset_id'] = display_df['dataset_id'].apply(lambda x: f'<p href="https://huggingface.co/datasets/{x}" style="color:blue">🔗{x}</p>')
+    # style all elements in the report_link column
+    if 'report_link' in columns:
+        display_df['report_link'] = display_df['report_link'].apply(lambda x: f'<p href="{x}" style="color:blue">🔗{x}</p>')
+    return display_df
+def get_demo():
+    records = get_records_from_dataset_repo('ZeroCommand/test-giskard-report')
+    model_ids = get_model_ids(records)
+    dataset_ids = get_dataset_ids(records)
+    column_names = records.columns.tolist()
+    default_columns = ['model_id', 'dataset_id', 'total_issues', 'report_link']
+    # set the default columns to show
+    default_df = records[default_columns]
+    types = get_types(default_df)
+    display_df = get_display_df(default_df)
+    with gr.Row():
+        task_select = gr.Dropdown(label='Task', choices=['text_classification', 'tabular'], value='text_classification', interactive=True)
+        model_select = gr.Dropdown(label='Model id', choices=model_ids, interactive=True)
+        dataset_select = gr.Dropdown(label='Dataset id', choices=dataset_ids, interactive=True)
+    with gr.Row():
+        columns_select = gr.CheckboxGroup(label='Show columns', choices=column_names, value=default_columns, interactive=True)
+    with gr.Row():
+        leaderboard_df = gr.DataFrame(display_df, datatype=types, interactive=False)
+    @gr.on(triggers=[model_select.change, dataset_select.change, columns_select.change, task_select.change],
+           inputs=[model_select, dataset_select, columns_select, task_select],
+           outputs=[leaderboard_df])
+    def filter_table(model_id, dataset_id, columns, task):
+        # filter the table based on task
+        df = records[(records['task'] == task)]
+        # filter the table based on the model_id and dataset_id
+        if model_id:
+            df = records[(records['model_id'] == model_id)]
+        if dataset_id:
+            df = records[(records['dataset_id'] == dataset_id)]
+        # filter the table based on the columns
+        df = df[columns]
+        types = get_types(df)
+        display_df = get_display_df(df)
+        return (
+            gr.update(value=display_df, datatype=types, interactive=False)
+        )

app_legacy.py CHANGED Viewed

@@ -11,7 +11,7 @@ import json
 from transformers.pipelines import TextClassificationPipeline
 from text_classification import check_column_mapping_keys_validity, text_classification_fix_column_mapping
-from utils import read_scanners, write_scanners, read_inference_type, write_inference_type, convert_column_mapping_to_json
 from wordings import CONFIRM_MAPPING_DETAILS_MD, CONFIRM_MAPPING_DETAILS_FAIL_MD
 HF_REPO_ID = 'HF_REPO_ID'

 from transformers.pipelines import TextClassificationPipeline
 from text_classification import check_column_mapping_keys_validity, text_classification_fix_column_mapping
+from io_utils import read_scanners, write_scanners, read_inference_type, write_inference_type, convert_column_mapping_to_json
 from wordings import CONFIRM_MAPPING_DETAILS_MD, CONFIRM_MAPPING_DETAILS_FAIL_MD
 HF_REPO_ID = 'HF_REPO_ID'

app_text_classification.py CHANGED Viewed

@@ -4,14 +4,15 @@ import os
 import time
 import subprocess
 import logging
 import json
 from transformers.pipelines import TextClassificationPipeline
-from text_classification import get_labels_and_features_from_dataset, check_model, get_example_prediction, check_column_mapping_keys_validity, text_classification_fix_column_mapping
-from utils import read_scanners, write_scanners, read_inference_type, read_column_mapping, write_column_mapping, write_inference_type, convert_column_mapping_to_json
-from wordings import CONFIRM_MAPPING_DETAILS_MD, CONFIRM_MAPPING_DETAILS_FAIL_MD, CONFIRM_MAPPING_DETAILS_FAIL_RAW
 HF_REPO_ID = 'HF_REPO_ID'
 HF_SPACE_ID = 'SPACE_ID'
@@ -95,7 +96,7 @@ def check_dataset_and_get_split(dataset_id, dataset_config):
 def get_demo():
     with gr.Row():
-        gr.Markdown(CONFIRM_MAPPING_DETAILS_MD)
     with gr.Row():
         model_id_input = gr.Textbox(
             label="Hugging Face model id",
@@ -117,13 +118,17 @@ def get_demo():
         example_prediction = gr.Label(label='Model Prediction Sample', visible=False)
     with gr.Row():
-        column_mappings = []
-        with gr.Column():
-            for _ in range(MAX_LABELS):
-                column_mappings.append(gr.Dropdown(visible=False))
-        with gr.Column():
-            for _ in range(MAX_LABELS, MAX_LABELS + MAX_FEATURES):
-                column_mappings.append(gr.Dropdown(visible=False))
     with gr.Accordion(label='Model Wrap Advance Config (optional)', open=False):
         run_local = gr.Checkbox(value=True, label="Run in this Space")
@@ -165,14 +170,12 @@ def get_demo():
                 all_mappings["features"][feat] = ds_features[i]
         write_column_mapping(all_mappings)
-    def list_labels_and_features_from_dataset(dataset_id, dataset_config, dataset_split, model_id2label, model_features):
-        ds_labels, ds_features = get_labels_and_features_from_dataset(dataset_id, dataset_config, dataset_split)
-        if ds_labels is None or ds_features is None:
-            return [gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)]
         model_labels = list(model_id2label.values())
         lables = [gr.Dropdown(label=f"{label}", choices=model_labels, value=model_id2label[i], interactive=True, visible=True) for i, label in enumerate(ds_labels[:MAX_LABELS])]
         lables += [gr.Dropdown(visible=False) for _ in range(MAX_LABELS - len(lables))]
-        features = [gr.Dropdown(label=f"{feature}", choices=ds_features, value=ds_features[0], interactive=True, visible=True) for feature in model_features]
         features += [gr.Dropdown(visible=False) for _ in range(MAX_FEATURES - len(features))]
         return lables + features
@@ -182,7 +185,7 @@ def get_demo():
     @gr.on(triggers=[model_id_input.change, dataset_config_input.change, dataset_split_input.change],
         inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
-        outputs=[example_input, example_prediction, *column_mappings])
     def check_model_and_show_prediction(model_id, dataset_id, dataset_config, dataset_split):
         ppl = check_model(model_id)
         if ppl is None or not isinstance(ppl, TextClassificationPipeline):
@@ -192,27 +195,52 @@ def get_demo():
                 gr.update(visible=False),
                 *[gr.update(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)]
             )
         model_id2label = ppl.model.config.id2label
-        model_features = ['text']
         column_mappings = list_labels_and_features_from_dataset(
-            dataset_id,
-            dataset_config,
-            dataset_split,
-            model_id2label,
-            model_features
         )
-        if ppl is None:
-            gr.Warning("Model not found")
             return (
                 gr.update(visible=False),
                 gr.update(visible=False),
                 *column_mappings
             )
         prediction_input, prediction_output = get_example_prediction(ppl, dataset_id, dataset_config, dataset_split)
         return (
             gr.update(value=prediction_input, visible=True),
             gr.update(value=prediction_output, visible=True),
             *column_mappings
         )
@@ -222,7 +250,17 @@ def get_demo():
         check_dataset_and_get_split,
         inputs=[dataset_id_input, dataset_config_input],
         outputs=[dataset_split_input])
     gr.on(
         triggers=[
             run_btn.click,

 import time
 import subprocess
 import logging
+import collections
 import json
 from transformers.pipelines import TextClassificationPipeline
+from text_classification import get_labels_and_features_from_dataset, check_model, get_example_prediction
+from io_utils import read_scanners, write_scanners, read_inference_type, read_column_mapping, write_column_mapping, write_inference_type
+from wordings import INTRODUCTION_MD, CONFIRM_MAPPING_DETAILS_MD, CONFIRM_MAPPING_DETAILS_FAIL_RAW
 HF_REPO_ID = 'HF_REPO_ID'
 HF_SPACE_ID = 'SPACE_ID'
 def get_demo():
     with gr.Row():
+        gr.Markdown(INTRODUCTION_MD)
     with gr.Row():
         model_id_input = gr.Textbox(
             label="Hugging Face model id",
         example_prediction = gr.Label(label='Model Prediction Sample', visible=False)
     with gr.Row():
+        with gr.Accordion(label='Label and Feature Mapping', visible=False, open=False) as column_mapping_accordion:
+            with gr.Row():
+                gr.Markdown(CONFIRM_MAPPING_DETAILS_MD)
+            column_mappings = []
+            with gr.Row():
+                with gr.Column():
+                    for _ in range(MAX_LABELS):
+                        column_mappings.append(gr.Dropdown(visible=False))
+                with gr.Column():
+                    for _ in range(MAX_LABELS, MAX_LABELS + MAX_FEATURES):
+                        column_mappings.append(gr.Dropdown(visible=False))
     with gr.Accordion(label='Model Wrap Advance Config (optional)', open=False):
         run_local = gr.Checkbox(value=True, label="Run in this Space")
                 all_mappings["features"][feat] = ds_features[i]
         write_column_mapping(all_mappings)
+    def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label):
         model_labels = list(model_id2label.values())
         lables = [gr.Dropdown(label=f"{label}", choices=model_labels, value=model_id2label[i], interactive=True, visible=True) for i, label in enumerate(ds_labels[:MAX_LABELS])]
         lables += [gr.Dropdown(visible=False) for _ in range(MAX_LABELS - len(lables))]
+        # TODO: Substitute 'text' with more features for zero-shot
+        features = [gr.Dropdown(label=f"{feature}", choices=ds_features, value=ds_features[0], interactive=True, visible=True) for feature in ['text']]
         features += [gr.Dropdown(visible=False) for _ in range(MAX_FEATURES - len(features))]
         return lables + features
     @gr.on(triggers=[model_id_input.change, dataset_config_input.change, dataset_split_input.change],
         inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
+        outputs=[example_input, example_prediction, column_mapping_accordion, *column_mappings])
     def check_model_and_show_prediction(model_id, dataset_id, dataset_config, dataset_split):
         ppl = check_model(model_id)
         if ppl is None or not isinstance(ppl, TextClassificationPipeline):
                 gr.update(visible=False),
                 *[gr.update(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)]
             )
+        dropdown_placement = [gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)]
+        if ppl is None: # pipeline not found
+            gr.Warning("Model not found")
+            return (
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False, open=False),
+                *dropdown_placement
+            )
         model_id2label = ppl.model.config.id2label
+        ds_labels, ds_features = get_labels_and_features_from_dataset(dataset_id, dataset_config, dataset_split)
+        # when dataset does not have labels or features
+        if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
+            gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
+            return (
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False, open=False),
+                *dropdown_placement
+            )
         column_mappings = list_labels_and_features_from_dataset(
+            ds_labels,
+            ds_features,
+            model_id2label,
         )
+        # when labels or features are not aligned
+        # show manually column mapping
+        if collections.Counter(model_id2label.items()) != collections.Counter(ds_labels) or ds_features[0] != 'text':
+            gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
             return (
                 gr.update(visible=False),
                 gr.update(visible=False),
+                gr.update(visible=True, open=True),
                 *column_mappings
             )
         prediction_input, prediction_output = get_example_prediction(ppl, dataset_id, dataset_config, dataset_split)
         return (
             gr.update(value=prediction_input, visible=True),
             gr.update(value=prediction_output, visible=True),
+            gr.update(visible=True, open=False),
             *column_mappings
         )
         check_dataset_and_get_split,
         inputs=[dataset_id_input, dataset_config_input],
         outputs=[dataset_split_input])
+    scanners.change(
+        write_scanners,
+        inputs=scanners
+    )
+    run_inference.change(
+        write_inference_type,
+        inputs=[run_inference]
+    )
     gr.on(
         triggers=[
             run_btn.click,

cicd ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 24d96209fb943568e001d582999345e2c58e0876

fetch_utils.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import huggingface_hub
+import datasets
+import logging
+def check_dataset_and_get_config(dataset_id):
+    try:
+        configs = datasets.get_dataset_config_names(dataset_id)
+        return configs
+    except Exception:
+        # Dataset may not exist
+        return None
+def check_dataset_and_get_split(dataset_id, dataset_config):
+    try:
+        ds = datasets.load_dataset(dataset_id, dataset_config)
+    except Exception as e:
+        # Dataset may not exist
+        logging.warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
+        return None
+    try:
+        splits = list(ds.keys())
+        return splits
+    except Exception as e:
+        # Dataset has no splits
+        logging.warning(f"Dataset {dataset_id} with config {dataset_config} has no splits: {e}")
+        return None

utils.py → io_utils.py RENAMED Viewed

@@ -17,13 +17,13 @@ def read_scanners(path):
 # convert a list of scanners to yaml file
 def write_scanners(scanners):
-    with open(YAML_PATH, "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
-    config["detectors"] = scanners
-    with open(YAML_PATH, "w") as f:
-        # save scanners to detectors in yaml
-        yaml.dump(config, f, Dumper=Dumper)
 # read model_type from yaml file
 def read_inference_type(path):
@@ -35,15 +35,14 @@ def read_inference_type(path):
 # write model_type to yaml file
 def write_inference_type(use_inference):
-    with open(YAML_PATH, "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
     if use_inference:
         config["inference_type"] = 'hf_inference_api'
     else:
         config["inference_type"] = 'hf_pipeline'
-    with open(YAML_PATH, "w") as f:
-        # save inference_type to inference_type in yaml
-        yaml.dump(config, f, Dumper=Dumper)
 # read column mapping from yaml file
 def read_column_mapping(path):

 # convert a list of scanners to yaml file
 def write_scanners(scanners):
+    print(scanners)
+    with open(YAML_PATH, "r+") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
+        if config:
+            config["detectors"] = scanners
+            # save scanners to detectors in yaml
+            yaml.dump(config, f, Dumper=Dumper)
 # read model_type from yaml file
 def read_inference_type(path):
 # write model_type to yaml file
 def write_inference_type(use_inference):
+    with open(YAML_PATH, "r+") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
     if use_inference:
         config["inference_type"] = 'hf_inference_api'
     else:
         config["inference_type"] = 'hf_pipeline'
+    # save inference_type to inference_type in yaml
+    yaml.dump(config, f, Dumper=Dumper)
 # read column mapping from yaml file
 def read_column_mapping(path):

wordings.py CHANGED Viewed

@@ -1,10 +1,15 @@
-CONFIRM_MAPPING_DETAILS_MD = '''
                 <h1 style="text-align: center;">
-                Giskard Evaluator
                 </h1>
                 Welcome to Giskard Evaluator Space! Get your report immediately by simply input your model id and dataset id below. Follow our leads and improve your model in no time.
                 '''
 CONFIRM_MAPPING_DETAILS_FAIL_MD = '''
                             <h1 style="text-align: center;">
                             Confirm Pre-processing Details

+INTRODUCTION_MD = '''
                 <h1 style="text-align: center;">
+                🐢Giskard Evaluator
                 </h1>
                 Welcome to Giskard Evaluator Space! Get your report immediately by simply input your model id and dataset id below. Follow our leads and improve your model in no time.
                 '''
+CONFIRM_MAPPING_DETAILS_MD = '''
+                            <h1 style="text-align: center;">
+                            Confirm Pre-processing Details
+                            </h1>
+                            Please confirm the pre-processing details below. If you are not sure, please double check your model and dataset.
+                            '''
 CONFIRM_MAPPING_DETAILS_FAIL_MD = '''
                             <h1 style="text-align: center;">
                             Confirm Pre-processing Details