giskard-evaluator

Sleeping

App Files Files Community

200

inoki-giskard

ZeroCommand commited on Jan 4, 2024

Commit

136af2d

1 Parent(s): df23b1a

GSK-2435-sub-issues (#20)

Browse files

- GSK-2396 allow edit feature mapping and scan config (b5a969d5c9a59c5d4b224865e42a47352c18e3c3)
- use global queue and fix write configs (55aeb043410733ef4f0a05831b31571e1dd954bc)

Co-authored-by: zcy <ZeroCommand@users.noreply.huggingface.co>

Files changed (8) hide show

app_text_classification.py +22 -20
io_utils.py +32 -47
pipe.py +4 -0
run_jobs.py +2 -2
scan_config.yaml +8 -0
text_classification.py +5 -1
text_classification_ui_helpers.py +9 -9
utils.py +24 -0

app_text_classification.py CHANGED Viewed

@@ -25,8 +25,12 @@ CONFIG_PATH = "./config.yaml"
 def get_demo(demo):
     with gr.Row():
         gr.Markdown(INTRODUCTION_MD)
     with gr.Row():
         model_id_input = gr.Textbox(
             label="Hugging Face model id",
@@ -64,11 +68,11 @@ def get_demo(demo):
     with gr.Accordion(label="Model Wrap Advance Config (optional)", open=False):
         run_local = gr.Checkbox(value=True, label="Run in this Space")
-        use_inference = read_inference_type(CONFIG_PATH) == "hf_inference_api"
         run_inference = gr.Checkbox(value=use_inference, label="Run with Inference API")
     with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
-        selected = read_scanners(CONFIG_PATH)
         # currently we remove data_leakage from the default scanners
         # Reason: data_leakage barely raises any issues and takes too many requests
         # when using inference API, causing rate limit error
@@ -86,13 +90,23 @@ def get_demo(demo):
         )
     with gr.Row():
-        uid = uuid.uuid4()
-        uid_label = gr.Textbox(
-            label="Evaluation ID:", value=uid, visible=False, interactive=False
-        )
         logs = gr.Textbox(label="Giskard Bot Evaluation Log:", visible=False)
         demo.load(get_logs_file, uid_label, logs, every=0.5)
     gr.on(
         triggers=[label.change for label in column_mappings],
         fn=write_column_mapping_to_config,
@@ -100,6 +114,7 @@ def get_demo(demo):
             dataset_id_input,
             dataset_config_input,
             dataset_split_input,
             *column_mappings,
         ],
     )
@@ -107,6 +122,7 @@ def get_demo(demo):
     gr.on(
         triggers=[
             model_id_input.change,
             dataset_config_input.change,
             dataset_split_input.change,
         ],
@@ -125,20 +141,6 @@ def get_demo(demo):
         ],
     )
-    dataset_id_input.blur(
-        check_dataset_and_get_config, dataset_id_input, dataset_config_input
-    )
-    dataset_config_input.change(
-        check_dataset_and_get_split,
-        inputs=[dataset_id_input, dataset_config_input],
-        outputs=[dataset_split_input],
-    )
-    scanners.change(write_scanners, inputs=scanners)
-    run_inference.change(write_inference_type, inputs=[run_inference])
     gr.on(
         triggers=[
             run_btn.click,

 def get_demo(demo):
+    uid = uuid.uuid4()
     with gr.Row():
         gr.Markdown(INTRODUCTION_MD)
+        uid_label = gr.Textbox(
+            label="Evaluation ID:", value=uid, visible=False, interactive=False
+        )
     with gr.Row():
         model_id_input = gr.Textbox(
             label="Hugging Face model id",
     with gr.Accordion(label="Model Wrap Advance Config (optional)", open=False):
         run_local = gr.Checkbox(value=True, label="Run in this Space")
+        use_inference = read_inference_type(uid) == "hf_inference_api"
         run_inference = gr.Checkbox(value=use_inference, label="Run with Inference API")
     with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
+        selected = read_scanners(uid)
         # currently we remove data_leakage from the default scanners
         # Reason: data_leakage barely raises any issues and takes too many requests
         # when using inference API, causing rate limit error
         )
     with gr.Row():
         logs = gr.Textbox(label="Giskard Bot Evaluation Log:", visible=False)
         demo.load(get_logs_file, uid_label, logs, every=0.5)
+    dataset_id_input.change(
+        check_dataset_and_get_config, inputs=[dataset_id_input, uid_label], outputs=[dataset_config_input]
+    )
+    dataset_config_input.change(
+        check_dataset_and_get_split,
+        inputs=[dataset_id_input, dataset_config_input],
+        outputs=[dataset_split_input],
+    )
+    scanners.change(write_scanners, inputs=[scanners, uid_label])
+    run_inference.change(write_inference_type, inputs=[run_inference, uid_label])
     gr.on(
         triggers=[label.change for label in column_mappings],
         fn=write_column_mapping_to_config,
             dataset_id_input,
             dataset_config_input,
             dataset_split_input,
+            uid_label,
             *column_mappings,
         ],
     )
     gr.on(
         triggers=[
             model_id_input.change,
+            dataset_id_input.change,
             dataset_config_input.change,
             dataset_split_input.change,
         ],
         ],
     )
     gr.on(
         triggers=[
             run_btn.click,

io_utils.py CHANGED Viewed

@@ -1,50 +1,56 @@
 import os
 import subprocess
 import yaml
-YAML_PATH = "./config.yaml"
-PIPE_PATH = "./tmp/pipe"
 class Dumper(yaml.Dumper):
     def increase_indent(self, flow=False, *args, **kwargs):
         return super().increase_indent(flow=flow, indentless=False)
 # read scanners from yaml file
 # return a list of scanners
-def read_scanners(path):
     scanners = []
-    with open(path, "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
         scanners = config.get("detectors", [])
     return scanners
 # convert a list of scanners to yaml file
-def write_scanners(scanners):
-    print(scanners)
-    with open(YAML_PATH, "r+") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
         if config:
             config["detectors"] = scanners
             # save scanners to detectors in yaml
             yaml.dump(config, f, Dumper=Dumper)
 # read model_type from yaml file
-def read_inference_type(path):
     inference_type = ""
-    with open(path, "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
         inference_type = config.get("inference_type", "")
     return inference_type
 # write model_type to yaml file
-def write_inference_type(use_inference):
-    with open(YAML_PATH, "r+") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
     if use_inference:
         config["inference_type"] = "hf_inference_api"
@@ -52,31 +58,34 @@ def write_inference_type(use_inference):
         config["inference_type"] = "hf_pipeline"
     # save inference_type to inference_type in yaml
     yaml.dump(config, f, Dumper=Dumper)
 # read column mapping from yaml file
-def read_column_mapping(path):
     column_mapping = {}
-    with open(path, "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
         if config:
             column_mapping = config.get("column_mapping", dict())
     return column_mapping
 # write column mapping to yaml file
-def write_column_mapping(mapping):
-    with open(YAML_PATH, "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
     if config is None:
         return
     if mapping is None and "column_mapping" in config.keys():
         del config["column_mapping"]
     else:
         config["column_mapping"] = mapping
-    with open(YAML_PATH, "w") as f:
         # save column_mapping to column_mapping in yaml
         yaml.dump(config, f, Dumper=Dumper)
 # convert column mapping dataframe to json
@@ -102,39 +111,15 @@ def write_log_to_user_file(id, log):
 def save_job_to_pipe(id, job, lock):
-    if not os.path.exists("./tmp"):
-        os.makedirs("./tmp")
-    job = [str(i) for i in job]
-    job = ",".join(job)
-    print(job)
     with lock:
-        with open(PIPE_PATH, "a") as f:
-            # write each element in job
-            f.write(f"{id}@{job}\n")
 def pop_job_from_pipe():
-    if not os.path.exists(PIPE_PATH):
         return
-    with open(PIPE_PATH, "r") as f:
-        job = f.readline().strip()
-        remaining = f.readlines()
-        f.close()
-    with open(PIPE_PATH, "w") as f:
-        f.write("\n".join(remaining))
-        f.close()
-    if len(job) == 0:
-        return
-    job_info = job.split("\n")[0].split("@")
-    if len(job_info) != 2:
-        raise ValueError("Invalid job info: ", job_info)
     write_log_to_user_file(job_info[0], f"Running job id {job_info[0]}\n")
-    command = job_info[1].split(",")
-    masked_command = command.copy()
-    hf_token_index = masked_command.index("--hf_token")
-    masked_command[hf_token_index + 1] = "hf_********"
-    write_log_to_user_file(job_info[0], f"Running command {masked_command}\n")
     log_file = open(f"./tmp/{job_info[0]}_log", "a")
     subprocess.Popen(

 import os
 import subprocess
+import pipe
 import yaml
+YAML_PATH = "./configs"
 class Dumper(yaml.Dumper):
     def increase_indent(self, flow=False, *args, **kwargs):
         return super().increase_indent(flow=flow, indentless=False)
+def get_yaml_path(uid):
+    if not os.path.exists(YAML_PATH):
+        os.makedirs(YAML_PATH)
+    if not os.path.exists(f"{YAML_PATH}/{uid}_config.yaml"):
+        os.system(f"cp {YAML_PATH}/config.yaml {YAML_PATH}/{uid}_config.yaml")
+    return f"{YAML_PATH}/{uid}_config.yaml"
 # read scanners from yaml file
 # return a list of scanners
+def read_scanners(uid):
     scanners = []
+    with open(get_yaml_path(uid), "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
         scanners = config.get("detectors", [])
+    f.close()
     return scanners
 # convert a list of scanners to yaml file
+def write_scanners(scanners, uid):
+    with open(get_yaml_path(uid), "r+") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
         if config:
             config["detectors"] = scanners
             # save scanners to detectors in yaml
             yaml.dump(config, f, Dumper=Dumper)
+    f.close()
 # read model_type from yaml file
+def read_inference_type(uid):
     inference_type = ""
+    with open(get_yaml_path(uid), "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
         inference_type = config.get("inference_type", "")
+    f.close()
     return inference_type
 # write model_type to yaml file
+def write_inference_type(use_inference, uid):
+    with open(get_yaml_path(uid), "r+") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
     if use_inference:
         config["inference_type"] = "hf_inference_api"
         config["inference_type"] = "hf_pipeline"
     # save inference_type to inference_type in yaml
     yaml.dump(config, f, Dumper=Dumper)
+    f.close()
 # read column mapping from yaml file
+def read_column_mapping(uid):
     column_mapping = {}
+    with open(get_yaml_path(uid), "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
         if config:
             column_mapping = config.get("column_mapping", dict())
+    f.close()
     return column_mapping
 # write column mapping to yaml file
+def write_column_mapping(mapping, uid):
+    with open(get_yaml_path(uid), "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
+        f.close()
     if config is None:
         return
     if mapping is None and "column_mapping" in config.keys():
         del config["column_mapping"]
     else:
         config["column_mapping"] = mapping
+    with open(get_yaml_path(uid), "w") as f:
         # save column_mapping to column_mapping in yaml
         yaml.dump(config, f, Dumper=Dumper)
+    f.close()
 # convert column mapping dataframe to json
 def save_job_to_pipe(id, job, lock):
     with lock:
+        pipe.jobs.append((id, job))
 def pop_job_from_pipe():
+    if len(pipe.jobs) == 0:
         return
+    job_info = pipe.jobs.pop()
     write_log_to_user_file(job_info[0], f"Running job id {job_info[0]}\n")
+    command = job_info[1]
     log_file = open(f"./tmp/{job_info[0]}_log", "a")
     subprocess.Popen(

pipe.py ADDED Viewed

	@@ -0,0 +1,4 @@

+def init():
+    global jobs
+    jobs = list()

run_jobs.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import threading
 import time
 from io_utils import pop_job_from_pipe
@@ -11,6 +11,7 @@ def start_process_run_job():
         thread = threading.Thread(target=run_job)
         thread.daemon = True
         thread.do_run = True
         thread.start()
     except Exception as e:
@@ -24,7 +25,6 @@ def stop_thread():
 def run_job():
     while True:
-        print(thread.do_run)
         try:
             pop_job_from_pipe()
             time.sleep(10)

 import threading
 import time
+import pipe
 from io_utils import pop_job_from_pipe
         thread = threading.Thread(target=run_job)
         thread.daemon = True
         thread.do_run = True
+        pipe.init()
         thread.start()
     except Exception as e:
 def run_job():
     while True:
         try:
             pop_job_from_pipe()
             time.sleep(10)

scan_config.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+detectors:
+  - ethical_bias
+  - text_perturbation
+  - robustness
+  - performance
+  - underconfidence
+  - overconfidence
+  - spurious_correlation

text_classification.py CHANGED Viewed

@@ -8,6 +8,10 @@ from transformers import pipeline
 def get_labels_and_features_from_dataset(dataset_id, dataset_config, split):
     try:
         ds = datasets.load_dataset(dataset_id, dataset_config)[split]
         dataset_features = ds.features
@@ -50,7 +54,7 @@ def text_classification_map_model_and_dataset_labels(id2label, dataset_features)
             continue
         if len(feature.names) != len(id2label_mapping.keys()):
             continue
         dataset_labels = feature.names
         # Try to match labels
         for label in feature.names:

 def get_labels_and_features_from_dataset(dataset_id, dataset_config, split):
+    if not dataset_config:
+        dataset_config = 'default'
+    if not split:
+        split = 'train'
     try:
         ds = datasets.load_dataset(dataset_id, dataset_config)[split]
         dataset_features = ds.features
             continue
         if len(feature.names) != len(id2label_mapping.keys()):
             continue
         dataset_labels = feature.names
         # Try to match labels
         for label in feature.names:

text_classification_ui_helpers.py CHANGED Viewed

@@ -27,12 +27,10 @@ MAX_FEATURES = 20
 HF_REPO_ID = "HF_REPO_ID"
 HF_SPACE_ID = "SPACE_ID"
 HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
-CONFIG_PATH = "./config.yaml"
-def check_dataset_and_get_config(dataset_id):
     try:
-        write_column_mapping(None)
         configs = datasets.get_dataset_config_names(dataset_id)
         return gr.Dropdown(configs, value=configs[0], visible=True)
     except Exception:
@@ -50,14 +48,16 @@ def check_dataset_and_get_split(dataset_id, dataset_config):
         pass
-def write_column_mapping_to_config(dataset_id, dataset_config, dataset_split, *labels):
-    ds_labels, ds_features = get_labels_and_features_from_dataset(
         dataset_id, dataset_config, dataset_split
     )
     if labels is None:
         return
     labels = [*labels]
-    all_mappings = read_column_mapping(CONFIG_PATH)
     if all_mappings is None:
         all_mappings = dict()
@@ -73,7 +73,7 @@ def write_column_mapping_to_config(dataset_id, dataset_config, dataset_split, *l
         if feat:
             # TODO: Substitute 'text' with more features for zero-shot
             all_mappings["features"]["text"] = feat
-    write_column_mapping(all_mappings)
 def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label):
@@ -178,7 +178,7 @@ def check_model_and_show_prediction(
 def try_submit(m_id, d_id, config, split, local, uid):
-    all_mappings = read_column_mapping(CONFIG_PATH)
     if all_mappings is None:
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)

 HF_REPO_ID = "HF_REPO_ID"
 HF_SPACE_ID = "SPACE_ID"
 HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
+def check_dataset_and_get_config(dataset_id, uid):
     try:
+        write_column_mapping(None, uid)  # reset column mapping
         configs = datasets.get_dataset_config_names(dataset_id)
         return gr.Dropdown(configs, value=configs[0], visible=True)
     except Exception:
         pass
+def write_column_mapping_to_config(dataset_id, dataset_config, dataset_split, uid, *labels):
+    # TODO: Substitute 'text' with more features for zero-shot
+    # we are not using ds features because we only support "text" for now
+    ds_labels, _ = get_labels_and_features_from_dataset(
         dataset_id, dataset_config, dataset_split
     )
     if labels is None:
         return
     labels = [*labels]
+    all_mappings = read_column_mapping(uid)
     if all_mappings is None:
         all_mappings = dict()
         if feat:
             # TODO: Substitute 'text' with more features for zero-shot
             all_mappings["features"]["text"] = feat
+    write_column_mapping(all_mappings, uid)
 def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label):
 def try_submit(m_id, d_id, config, split, local, uid):
+    all_mappings = read_column_mapping(uid)
     if all_mappings is None:
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)

utils.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import yaml
+import sys
+# read scanners from yaml file
+# return a list of scanners
+def read_scanners(path):
+    scanners = []
+    with open(path, "r") as f:
+        config = yaml.load(f, Loader=yaml.FullLoader)
+        scanners = config.get("detectors", None)
+    return scanners
+# convert a list of scanners to yaml file
+def write_scanners(scanners):
+    with open("./scan_config.yaml", "w") as f:
+        # save scanners to detectors in yaml
+        yaml.dump({"detectors": scanners}, f)
+# convert column mapping dataframe to json
+def convert_column_mapping_to_json(df, label=""):
+    column_mapping = {}
+    column_mapping[label] = []
+    for _, row in df.iterrows():
+        column_mapping[label].append(row.tolist())
+    return column_mapping