inoki-giskard ZeroCommand commited on
Commit
be473e6
·
1 Parent(s): 9e4233f

GSK-2352 create a leaderboard tab (#15)

Browse files

- add leaderboard ui and refactor code (cbb886ad64e513b28407c51bf6f505899f61264d)
- fix bugs and add logs for leaderboard tab (80ed307fea11eb9acb2fff7e6d96ab2fba7aa55c)
- fix cicd submodule (f0a313e7fe8c116d0fee6c8d224373e9057f9db3)
- hide dropdown menus when the labels match (ba41a5cd5b15c2062ecc33dc66f9109cd3f88148)
- hide dropdown menus when the labels match when labels not matching (5058ff34b4c55153b293ac6f00c1e9c50b9ca7c9)


Co-authored-by: zcy <ZeroCommand@users.noreply.huggingface.co>

app.py CHANGED
@@ -5,11 +5,13 @@
5
  import gradio as gr
6
 
7
  from app_text_classification import get_demo as get_demo_text_classification
8
-
9
 
10
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
11
  with gr.Tab("Text Classification"):
12
  get_demo_text_classification()
13
- with gr.Tab("Leaderboard - Text Classification"):
14
- pass
15
- demo.launch()
 
 
 
5
  import gradio as gr
6
 
7
  from app_text_classification import get_demo as get_demo_text_classification
8
+ from app_leaderboard import get_demo as get_demo_leaderboard
9
 
10
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
11
  with gr.Tab("Text Classification"):
12
  get_demo_text_classification()
13
+ with gr.Tab("Leaderboard"):
14
+ get_demo_leaderboard()
15
+
16
+ demo.queue(max_size=100)
17
+ demo.launch(share=False)
app_leaderboard.py CHANGED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import datasets
3
+ import logging
4
+ from fetch_utils import check_dataset_and_get_config, check_dataset_and_get_split
5
+
6
+ def get_records_from_dataset_repo(dataset_id):
7
+ dataset_config = check_dataset_and_get_config(dataset_id)
8
+
9
+ logging.info(f"Dataset {dataset_id} has configs {dataset_config}")
10
+ dataset_split = check_dataset_and_get_split(dataset_id, dataset_config[0])
11
+ logging.info(f"Dataset {dataset_id} has splits {dataset_split}")
12
+
13
+ try:
14
+ ds = datasets.load_dataset(dataset_id, dataset_config[0])[dataset_split[0]]
15
+ df = ds.to_pandas()
16
+ return df
17
+ except Exception as e:
18
+ logging.warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
19
+ return None
20
+
21
+ def get_model_ids(ds):
22
+ logging.info(f"Dataset {ds} column names: {ds['model_id']}")
23
+ models = ds['model_id'].tolist()
24
+ # return unique elements in the list model_ids
25
+ model_ids = list(set(models))
26
+ return model_ids
27
+
28
+ def get_dataset_ids(ds):
29
+ logging.info(f"Dataset {ds} column names: {ds['dataset_id']}")
30
+ datasets = ds['dataset_id'].tolist()
31
+ dataset_ids = list(set(datasets))
32
+ return dataset_ids
33
+
34
+ def get_types(ds):
35
+ # set all types for each column
36
+ types = [str(t) for t in ds.dtypes.to_list()]
37
+ types = [t.replace('object', 'markdown') for t in types]
38
+ types = [t.replace('float64', 'number') for t in types]
39
+ types = [t.replace('int64', 'number') for t in types]
40
+ return types
41
+
42
+ def get_display_df(df):
43
+ # style all elements in the model_id column
44
+ display_df = df.copy()
45
+ columns = display_df.columns.tolist()
46
+ if 'model_id' in columns:
47
+ display_df['model_id'] = display_df['model_id'].apply(lambda x: f'<p href="https://huggingface.co/{x}" style="color:blue">🔗{x}</p>')
48
+ # style all elements in the dataset_id column
49
+ if 'dataset_id' in columns:
50
+ display_df['dataset_id'] = display_df['dataset_id'].apply(lambda x: f'<p href="https://huggingface.co/datasets/{x}" style="color:blue">🔗{x}</p>')
51
+ # style all elements in the report_link column
52
+ if 'report_link' in columns:
53
+ display_df['report_link'] = display_df['report_link'].apply(lambda x: f'<p href="{x}" style="color:blue">🔗{x}</p>')
54
+ return display_df
55
+
56
+ def get_demo():
57
+ records = get_records_from_dataset_repo('ZeroCommand/test-giskard-report')
58
+
59
+ model_ids = get_model_ids(records)
60
+ dataset_ids = get_dataset_ids(records)
61
+
62
+ column_names = records.columns.tolist()
63
+ default_columns = ['model_id', 'dataset_id', 'total_issues', 'report_link']
64
+ # set the default columns to show
65
+ default_df = records[default_columns]
66
+ types = get_types(default_df)
67
+ display_df = get_display_df(default_df)
68
+
69
+ with gr.Row():
70
+ task_select = gr.Dropdown(label='Task', choices=['text_classification', 'tabular'], value='text_classification', interactive=True)
71
+ model_select = gr.Dropdown(label='Model id', choices=model_ids, interactive=True)
72
+ dataset_select = gr.Dropdown(label='Dataset id', choices=dataset_ids, interactive=True)
73
+
74
+ with gr.Row():
75
+ columns_select = gr.CheckboxGroup(label='Show columns', choices=column_names, value=default_columns, interactive=True)
76
+
77
+ with gr.Row():
78
+ leaderboard_df = gr.DataFrame(display_df, datatype=types, interactive=False)
79
+
80
+ @gr.on(triggers=[model_select.change, dataset_select.change, columns_select.change, task_select.change],
81
+ inputs=[model_select, dataset_select, columns_select, task_select],
82
+ outputs=[leaderboard_df])
83
+ def filter_table(model_id, dataset_id, columns, task):
84
+ # filter the table based on task
85
+ df = records[(records['task'] == task)]
86
+ # filter the table based on the model_id and dataset_id
87
+ if model_id:
88
+ df = records[(records['model_id'] == model_id)]
89
+ if dataset_id:
90
+ df = records[(records['dataset_id'] == dataset_id)]
91
+
92
+ # filter the table based on the columns
93
+ df = df[columns]
94
+ types = get_types(df)
95
+ display_df = get_display_df(df)
96
+ return (
97
+ gr.update(value=display_df, datatype=types, interactive=False)
98
+ )
app_legacy.py CHANGED
@@ -11,7 +11,7 @@ import json
11
  from transformers.pipelines import TextClassificationPipeline
12
 
13
  from text_classification import check_column_mapping_keys_validity, text_classification_fix_column_mapping
14
- from utils import read_scanners, write_scanners, read_inference_type, write_inference_type, convert_column_mapping_to_json
15
  from wordings import CONFIRM_MAPPING_DETAILS_MD, CONFIRM_MAPPING_DETAILS_FAIL_MD
16
 
17
  HF_REPO_ID = 'HF_REPO_ID'
 
11
  from transformers.pipelines import TextClassificationPipeline
12
 
13
  from text_classification import check_column_mapping_keys_validity, text_classification_fix_column_mapping
14
+ from io_utils import read_scanners, write_scanners, read_inference_type, write_inference_type, convert_column_mapping_to_json
15
  from wordings import CONFIRM_MAPPING_DETAILS_MD, CONFIRM_MAPPING_DETAILS_FAIL_MD
16
 
17
  HF_REPO_ID = 'HF_REPO_ID'
app_text_classification.py CHANGED
@@ -4,14 +4,15 @@ import os
4
  import time
5
  import subprocess
6
  import logging
 
7
 
8
  import json
9
 
10
  from transformers.pipelines import TextClassificationPipeline
11
 
12
- from text_classification import get_labels_and_features_from_dataset, check_model, get_example_prediction, check_column_mapping_keys_validity, text_classification_fix_column_mapping
13
- from utils import read_scanners, write_scanners, read_inference_type, read_column_mapping, write_column_mapping, write_inference_type, convert_column_mapping_to_json
14
- from wordings import CONFIRM_MAPPING_DETAILS_MD, CONFIRM_MAPPING_DETAILS_FAIL_MD, CONFIRM_MAPPING_DETAILS_FAIL_RAW
15
 
16
  HF_REPO_ID = 'HF_REPO_ID'
17
  HF_SPACE_ID = 'SPACE_ID'
@@ -95,7 +96,7 @@ def check_dataset_and_get_split(dataset_id, dataset_config):
95
 
96
  def get_demo():
97
  with gr.Row():
98
- gr.Markdown(CONFIRM_MAPPING_DETAILS_MD)
99
  with gr.Row():
100
  model_id_input = gr.Textbox(
101
  label="Hugging Face model id",
@@ -117,13 +118,17 @@ def get_demo():
117
  example_prediction = gr.Label(label='Model Prediction Sample', visible=False)
118
 
119
  with gr.Row():
120
- column_mappings = []
121
- with gr.Column():
122
- for _ in range(MAX_LABELS):
123
- column_mappings.append(gr.Dropdown(visible=False))
124
- with gr.Column():
125
- for _ in range(MAX_LABELS, MAX_LABELS + MAX_FEATURES):
126
- column_mappings.append(gr.Dropdown(visible=False))
 
 
 
 
127
 
128
  with gr.Accordion(label='Model Wrap Advance Config (optional)', open=False):
129
  run_local = gr.Checkbox(value=True, label="Run in this Space")
@@ -165,14 +170,12 @@ def get_demo():
165
  all_mappings["features"][feat] = ds_features[i]
166
  write_column_mapping(all_mappings)
167
 
168
- def list_labels_and_features_from_dataset(dataset_id, dataset_config, dataset_split, model_id2label, model_features):
169
- ds_labels, ds_features = get_labels_and_features_from_dataset(dataset_id, dataset_config, dataset_split)
170
- if ds_labels is None or ds_features is None:
171
- return [gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)]
172
  model_labels = list(model_id2label.values())
173
  lables = [gr.Dropdown(label=f"{label}", choices=model_labels, value=model_id2label[i], interactive=True, visible=True) for i, label in enumerate(ds_labels[:MAX_LABELS])]
174
  lables += [gr.Dropdown(visible=False) for _ in range(MAX_LABELS - len(lables))]
175
- features = [gr.Dropdown(label=f"{feature}", choices=ds_features, value=ds_features[0], interactive=True, visible=True) for feature in model_features]
 
176
  features += [gr.Dropdown(visible=False) for _ in range(MAX_FEATURES - len(features))]
177
  return lables + features
178
 
@@ -182,7 +185,7 @@ def get_demo():
182
 
183
  @gr.on(triggers=[model_id_input.change, dataset_config_input.change, dataset_split_input.change],
184
  inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
185
- outputs=[example_input, example_prediction, *column_mappings])
186
  def check_model_and_show_prediction(model_id, dataset_id, dataset_config, dataset_split):
187
  ppl = check_model(model_id)
188
  if ppl is None or not isinstance(ppl, TextClassificationPipeline):
@@ -192,27 +195,52 @@ def get_demo():
192
  gr.update(visible=False),
193
  *[gr.update(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)]
194
  )
 
 
 
 
 
 
 
 
 
 
 
195
  model_id2label = ppl.model.config.id2label
196
- model_features = ['text']
 
 
 
 
 
 
 
 
 
 
 
197
  column_mappings = list_labels_and_features_from_dataset(
198
- dataset_id,
199
- dataset_config,
200
- dataset_split,
201
- model_id2label,
202
- model_features
203
  )
204
 
205
- if ppl is None:
206
- gr.Warning("Model not found")
 
 
207
  return (
208
  gr.update(visible=False),
209
  gr.update(visible=False),
 
210
  *column_mappings
211
  )
 
212
  prediction_input, prediction_output = get_example_prediction(ppl, dataset_id, dataset_config, dataset_split)
213
  return (
214
  gr.update(value=prediction_input, visible=True),
215
  gr.update(value=prediction_output, visible=True),
 
216
  *column_mappings
217
  )
218
 
@@ -222,7 +250,17 @@ def get_demo():
222
  check_dataset_and_get_split,
223
  inputs=[dataset_id_input, dataset_config_input],
224
  outputs=[dataset_split_input])
225
-
 
 
 
 
 
 
 
 
 
 
226
  gr.on(
227
  triggers=[
228
  run_btn.click,
 
4
  import time
5
  import subprocess
6
  import logging
7
+ import collections
8
 
9
  import json
10
 
11
  from transformers.pipelines import TextClassificationPipeline
12
 
13
+ from text_classification import get_labels_and_features_from_dataset, check_model, get_example_prediction
14
+ from io_utils import read_scanners, write_scanners, read_inference_type, read_column_mapping, write_column_mapping, write_inference_type
15
+ from wordings import INTRODUCTION_MD, CONFIRM_MAPPING_DETAILS_MD, CONFIRM_MAPPING_DETAILS_FAIL_RAW
16
 
17
  HF_REPO_ID = 'HF_REPO_ID'
18
  HF_SPACE_ID = 'SPACE_ID'
 
96
 
97
  def get_demo():
98
  with gr.Row():
99
+ gr.Markdown(INTRODUCTION_MD)
100
  with gr.Row():
101
  model_id_input = gr.Textbox(
102
  label="Hugging Face model id",
 
118
  example_prediction = gr.Label(label='Model Prediction Sample', visible=False)
119
 
120
  with gr.Row():
121
+ with gr.Accordion(label='Label and Feature Mapping', visible=False, open=False) as column_mapping_accordion:
122
+ with gr.Row():
123
+ gr.Markdown(CONFIRM_MAPPING_DETAILS_MD)
124
+ column_mappings = []
125
+ with gr.Row():
126
+ with gr.Column():
127
+ for _ in range(MAX_LABELS):
128
+ column_mappings.append(gr.Dropdown(visible=False))
129
+ with gr.Column():
130
+ for _ in range(MAX_LABELS, MAX_LABELS + MAX_FEATURES):
131
+ column_mappings.append(gr.Dropdown(visible=False))
132
 
133
  with gr.Accordion(label='Model Wrap Advance Config (optional)', open=False):
134
  run_local = gr.Checkbox(value=True, label="Run in this Space")
 
170
  all_mappings["features"][feat] = ds_features[i]
171
  write_column_mapping(all_mappings)
172
 
173
+ def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label):
 
 
 
174
  model_labels = list(model_id2label.values())
175
  lables = [gr.Dropdown(label=f"{label}", choices=model_labels, value=model_id2label[i], interactive=True, visible=True) for i, label in enumerate(ds_labels[:MAX_LABELS])]
176
  lables += [gr.Dropdown(visible=False) for _ in range(MAX_LABELS - len(lables))]
177
+ # TODO: Substitute 'text' with more features for zero-shot
178
+ features = [gr.Dropdown(label=f"{feature}", choices=ds_features, value=ds_features[0], interactive=True, visible=True) for feature in ['text']]
179
  features += [gr.Dropdown(visible=False) for _ in range(MAX_FEATURES - len(features))]
180
  return lables + features
181
 
 
185
 
186
  @gr.on(triggers=[model_id_input.change, dataset_config_input.change, dataset_split_input.change],
187
  inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
188
+ outputs=[example_input, example_prediction, column_mapping_accordion, *column_mappings])
189
  def check_model_and_show_prediction(model_id, dataset_id, dataset_config, dataset_split):
190
  ppl = check_model(model_id)
191
  if ppl is None or not isinstance(ppl, TextClassificationPipeline):
 
195
  gr.update(visible=False),
196
  *[gr.update(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)]
197
  )
198
+
199
+ dropdown_placement = [gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)]
200
+
201
+ if ppl is None: # pipeline not found
202
+ gr.Warning("Model not found")
203
+ return (
204
+ gr.update(visible=False),
205
+ gr.update(visible=False),
206
+ gr.update(visible=False, open=False),
207
+ *dropdown_placement
208
+ )
209
  model_id2label = ppl.model.config.id2label
210
+ ds_labels, ds_features = get_labels_and_features_from_dataset(dataset_id, dataset_config, dataset_split)
211
+
212
+ # when dataset does not have labels or features
213
+ if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
214
+ gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
215
+ return (
216
+ gr.update(visible=False),
217
+ gr.update(visible=False),
218
+ gr.update(visible=False, open=False),
219
+ *dropdown_placement
220
+ )
221
+
222
  column_mappings = list_labels_and_features_from_dataset(
223
+ ds_labels,
224
+ ds_features,
225
+ model_id2label,
 
 
226
  )
227
 
228
+ # when labels or features are not aligned
229
+ # show manually column mapping
230
+ if collections.Counter(model_id2label.items()) != collections.Counter(ds_labels) or ds_features[0] != 'text':
231
+ gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
232
  return (
233
  gr.update(visible=False),
234
  gr.update(visible=False),
235
+ gr.update(visible=True, open=True),
236
  *column_mappings
237
  )
238
+
239
  prediction_input, prediction_output = get_example_prediction(ppl, dataset_id, dataset_config, dataset_split)
240
  return (
241
  gr.update(value=prediction_input, visible=True),
242
  gr.update(value=prediction_output, visible=True),
243
+ gr.update(visible=True, open=False),
244
  *column_mappings
245
  )
246
 
 
250
  check_dataset_and_get_split,
251
  inputs=[dataset_id_input, dataset_config_input],
252
  outputs=[dataset_split_input])
253
+
254
+ scanners.change(
255
+ write_scanners,
256
+ inputs=scanners
257
+ )
258
+
259
+ run_inference.change(
260
+ write_inference_type,
261
+ inputs=[run_inference]
262
+ )
263
+
264
  gr.on(
265
  triggers=[
266
  run_btn.click,
cicd ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 24d96209fb943568e001d582999345e2c58e0876
fetch_utils.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import huggingface_hub
2
+ import datasets
3
+ import logging
4
+
5
+ def check_dataset_and_get_config(dataset_id):
6
+ try:
7
+ configs = datasets.get_dataset_config_names(dataset_id)
8
+ return configs
9
+ except Exception:
10
+ # Dataset may not exist
11
+ return None
12
+
13
+ def check_dataset_and_get_split(dataset_id, dataset_config):
14
+ try:
15
+ ds = datasets.load_dataset(dataset_id, dataset_config)
16
+ except Exception as e:
17
+ # Dataset may not exist
18
+ logging.warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
19
+ return None
20
+ try:
21
+ splits = list(ds.keys())
22
+ return splits
23
+ except Exception as e:
24
+ # Dataset has no splits
25
+ logging.warning(f"Dataset {dataset_id} with config {dataset_config} has no splits: {e}")
26
+ return None
utils.py → io_utils.py RENAMED
@@ -17,13 +17,13 @@ def read_scanners(path):
17
 
18
  # convert a list of scanners to yaml file
19
  def write_scanners(scanners):
20
- with open(YAML_PATH, "r") as f:
 
21
  config = yaml.load(f, Loader=yaml.FullLoader)
22
-
23
- config["detectors"] = scanners
24
- with open(YAML_PATH, "w") as f:
25
- # save scanners to detectors in yaml
26
- yaml.dump(config, f, Dumper=Dumper)
27
 
28
  # read model_type from yaml file
29
  def read_inference_type(path):
@@ -35,15 +35,14 @@ def read_inference_type(path):
35
 
36
  # write model_type to yaml file
37
  def write_inference_type(use_inference):
38
- with open(YAML_PATH, "r") as f:
39
  config = yaml.load(f, Loader=yaml.FullLoader)
40
  if use_inference:
41
  config["inference_type"] = 'hf_inference_api'
42
  else:
43
  config["inference_type"] = 'hf_pipeline'
44
- with open(YAML_PATH, "w") as f:
45
- # save inference_type to inference_type in yaml
46
- yaml.dump(config, f, Dumper=Dumper)
47
 
48
  # read column mapping from yaml file
49
  def read_column_mapping(path):
 
17
 
18
  # convert a list of scanners to yaml file
19
  def write_scanners(scanners):
20
+ print(scanners)
21
+ with open(YAML_PATH, "r+") as f:
22
  config = yaml.load(f, Loader=yaml.FullLoader)
23
+ if config:
24
+ config["detectors"] = scanners
25
+ # save scanners to detectors in yaml
26
+ yaml.dump(config, f, Dumper=Dumper)
 
27
 
28
  # read model_type from yaml file
29
  def read_inference_type(path):
 
35
 
36
  # write model_type to yaml file
37
  def write_inference_type(use_inference):
38
+ with open(YAML_PATH, "r+") as f:
39
  config = yaml.load(f, Loader=yaml.FullLoader)
40
  if use_inference:
41
  config["inference_type"] = 'hf_inference_api'
42
  else:
43
  config["inference_type"] = 'hf_pipeline'
44
+ # save inference_type to inference_type in yaml
45
+ yaml.dump(config, f, Dumper=Dumper)
 
46
 
47
  # read column mapping from yaml file
48
  def read_column_mapping(path):
wordings.py CHANGED
@@ -1,10 +1,15 @@
1
- CONFIRM_MAPPING_DETAILS_MD = '''
2
  <h1 style="text-align: center;">
3
- Giskard Evaluator
4
  </h1>
5
  Welcome to Giskard Evaluator Space! Get your report immediately by simply input your model id and dataset id below. Follow our leads and improve your model in no time.
6
  '''
7
-
 
 
 
 
 
8
  CONFIRM_MAPPING_DETAILS_FAIL_MD = '''
9
  <h1 style="text-align: center;">
10
  Confirm Pre-processing Details
 
1
+ INTRODUCTION_MD = '''
2
  <h1 style="text-align: center;">
3
+ 🐢Giskard Evaluator
4
  </h1>
5
  Welcome to Giskard Evaluator Space! Get your report immediately by simply input your model id and dataset id below. Follow our leads and improve your model in no time.
6
  '''
7
+ CONFIRM_MAPPING_DETAILS_MD = '''
8
+ <h1 style="text-align: center;">
9
+ Confirm Pre-processing Details
10
+ </h1>
11
+ Please confirm the pre-processing details below. If you are not sure, please double check your model and dataset.
12
+ '''
13
  CONFIRM_MAPPING_DETAILS_FAIL_MD = '''
14
  <h1 style="text-align: center;">
15
  Confirm Pre-processing Details