Spaces:

amsterdamNLP
/

attention-rollout

Running

Martijn van Beers commited on Sep 21, 2022

Commit

9d1fa85

1 Parent(s): 6c01ee5

Clean up code

* separates out the code for the two methods
* use gradio Blocks instead of Interface for flexibility
* add a markdown file for a note on explainability models and their
limitations, filled with a placeholder for now

Files changed (6) hide show

app.py +33 -271
description.md +2 -2
lib/gradient_rollout.py +112 -0
lib/integrated_gradients.py +90 -0
lib/util.py +86 -0
notice.md +1 -0

app.py CHANGED Viewed

@@ -1,291 +1,53 @@
 import sys
 import pandas
 import gradio
 sys.path.append("lib")
 import torch
 from transformers import AutoModelForSequenceClassification
-from BERT_explainability.ExplanationGenerator import Generator
-from BERT_explainability.roberta2 import RobertaForSequenceClassification
 from transformers import AutoTokenizer
 from captum.attr import LayerIntegratedGradients
 from captum.attr import visualization
 import torch
-# from https://discuss.pytorch.org/t/using-scikit-learns-scalers-for-torchvision/53455
-class PyTMinMaxScalerVectorized(object):
-    """
-    Transforms each channel to the range [0, 1].
-    """
-    def __init__(self, dimension=-1):
-        self.d = dimension
-    def __call__(self, tensor):
-        d = self.d
-        scale = 1.0 / (
-            tensor.max(dim=d, keepdim=True)[0] - tensor.min(dim=d, keepdim=True)[0]
-        )
-        tensor.mul_(scale).sub_(tensor.min(dim=d, keepdim=True)[0])
-        return tensor
-if torch.cuda.is_available():
-    device = torch.device("cuda")
-else:
-    device = torch.device("cpu")
-model = RobertaForSequenceClassification.from_pretrained(
-    "textattack/roberta-base-SST-2"
-).to(device)
-model.eval()
-model2 = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-SST-2")
-tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-SST-2")
-# initialize the explanations generator
-explanations = Generator(model, "roberta")
-classifications = ["NEGATIVE", "POSITIVE"]
-# rule 5 from paper
-def avg_heads(cam, grad):
-    cam = (grad * cam).clamp(min=0).mean(dim=-3)
-    # set negative values to 0, then average
-    #    cam = cam.clamp(min=0).mean(dim=0)
-    return cam
-# rule 6 from paper
-def apply_self_attention_rules(R_ss, cam_ss):
-    R_ss_addition = torch.matmul(cam_ss, R_ss)
-    return R_ss_addition
-def generate_relevance(model, input_ids, attention_mask, index=None, start_layer=0):
-    output = model(input_ids=input_ids, attention_mask=attention_mask)[0]
-    if index == None:
-        # index = np.expand_dims(np.arange(input_ids.shape[1])
-        # by default explain the class with the highest score
-        index = output.argmax(axis=-1).detach().cpu().numpy()
-    # create a one-hot vector selecting class we want explanations for
-    one_hot = (
-        torch.nn.functional.one_hot(
-            torch.tensor(index, dtype=torch.int64), num_classes=output.size(-1)
-        )
-        .to(torch.float)
-        .requires_grad_(True)
-    ).to(device)
-    one_hot = torch.sum(one_hot * output)
-    model.zero_grad()
-    # create the gradients for the class we're interested in
-    one_hot.backward(retain_graph=True)
-    num_tokens = model.roberta.encoder.layer[0].attention.self.get_attn().shape[-1]
-    R = torch.eye(num_tokens).expand(output.size(0), -1, -1).clone().to(device)
-    for i, blk in enumerate(model.roberta.encoder.layer):
-        if i < start_layer:
-            continue
-        grad = blk.attention.self.get_attn_gradients()
-        cam = blk.attention.self.get_attn()
-        cam = avg_heads(cam, grad)
-        joint = apply_self_attention_rules(R, cam)
-        R += joint
-    return output, R[:, 0, 1:-1]
-def visualize_text(datarecords, legend=True):
-    dom = ["<table width: 100%>"]
-    rows = [
-        "<tr><th>True Label</th>"
-        "<th>Predicted Label</th>"
-        "<th>Attribution Label</th>"
-        "<th>Attribution Score</th>"
-        "<th>Word Importance</th>"
-    ]
-    for datarecord in datarecords:
-        rows.append(
-            "".join(
-                [
-                    "<tr>",
-                    visualization.format_classname(datarecord.true_class),
-                    visualization.format_classname(
-                        "{0} ({1:.2f})".format(
-                            datarecord.pred_class, datarecord.pred_prob
-                        )
-                    ),
-                    visualization.format_classname(datarecord.attr_class),
-                    visualization.format_classname(
-                        "{0:.2f}".format(datarecord.attr_score)
-                    ),
-                    visualization.format_word_importances(
-                        datarecord.raw_input_ids, datarecord.word_attributions
-                    ),
-                    "<tr>",
-                ]
-            )
-        )
-    if legend:
-        dom.append(
-            '<div style="border-top: 1px solid; margin-top: 5px; \
-            padding-top: 5px; display: inline-block">'
-        )
-        dom.append("<b>Legend: </b>")
-        for value, label in zip([-1, 0, 1], ["Negative", "Neutral", "Positive"]):
-            dom.append(
-                '<span style="display: inline-block; width: 10px; height: 10px; \
-                border: 1px solid; background-color: \
-                {value}"></span> {label}  '.format(
-                    value=visualization._get_color(value), label=label
-                )
-            )
-        dom.append("</div>")
-    dom.append("".join(rows))
-    dom.append("</table>")
-    html = "".join(dom)
-    return html
-def show_explanation(model, input_ids, attention_mask, index=None, start_layer=8):
-    # generate an explanation for the input
-    output, expl = generate_relevance(
-        model, input_ids, attention_mask, index=index, start_layer=start_layer
-    )
-    # normalize scores
-    scaler = PyTMinMaxScalerVectorized()
-    norm = scaler(expl)
-    # get the model classification
-    output = torch.nn.functional.softmax(output, dim=-1)
-    vis_data_records = []
-    for record in range(input_ids.size(0)):
-        classification = output[record].argmax(dim=-1).item()
-        class_name = classifications[classification]
-        nrm = norm[record]
-        # if the classification is negative, higher explanation scores are more negative
-        # flip for visualization
-        if class_name == "NEGATIVE":
-            nrm *= -1
-        tokens = tokenizer.convert_ids_to_tokens(input_ids[record].flatten())[
-            1 : 0 - ((attention_mask[record] == 0).sum().item() + 1)
-        ]
-#        vis_data_records.append(list(zip(tokens, nrm.tolist())))
-        vis_data_records.append(
-            visualization.VisualizationDataRecord(
-                nrm,
-                output[record][classification],
-                classification,
-                classification,
-                index,
-                1,
-                tokens,
-                1,
-            )
-        )
-    return visualize_text(vis_data_records)
-def custom_forward(inputs, attention_mask=None, pos=0):
-    result = model2(inputs, attention_mask=attention_mask, return_dict=True)
-    preds = result.logits
-    return preds
-def summarize_attributions(attributions):
-    attributions = attributions.sum(dim=-1).squeeze(0)
-    attributions = attributions / torch.norm(attributions)
-    return attributions
-def run_attribution_model(input_ids, attention_mask, ref_token_id=tokenizer.unk_token_id, layer=None, steps=20):
-    try:
-        output = model2(input_ids=input_ids, attention_mask=attention_mask)[0]
-        index = output.argmax(axis=-1).detach().cpu().numpy()
-        ablator = LayerIntegratedGradients(custom_forward, layer)
-        input_tensor = input_ids
-        attention_mask = attention_mask
-        attributions = ablator.attribute(
-                inputs=input_ids,
-                baselines=ref_token_id,
-                additional_forward_args=(attention_mask),
-                target=1,
-                n_steps=steps,
-        )
-        attributions = summarize_attributions(attributions).unsqueeze_(0)
-    finally:
-        pass
-    vis_data_records = []
-    for record in range(input_ids.size(0)):
-        classification = output[record].argmax(dim=-1).item()
-        class_name = classifications[classification]
-        attr = attributions[record]
-        tokens = tokenizer.convert_ids_to_tokens(input_ids[record].flatten())[
-            1 : 0 - ((attention_mask[record] == 0).sum().item() + 1)
-        ]
-        vis_data_records.append(
-            visualization.VisualizationDataRecord(
-                attr,
-                output[record][classification],
-                classification,
-                classification,
-                index,
-                1,
-                tokens,
-                1,
-            )
-        )
-    return visualize_text(vis_data_records)
-def sentence_sentiment(input_text, layer):
-    text_batch = [input_text]
-    encoding = tokenizer(text_batch, return_tensors="pt")
-    input_ids = encoding["input_ids"].to(device)
-    attention_mask = encoding["attention_mask"].to(device)
-    layer = int(layer)
-    if layer == 0:
-        layer = model2.roberta.embeddings
-    else:
-        layer = getattr(model2.roberta.encoder.layer, str(layer-1))
-    output = run_attribution_model(input_ids, attention_mask, layer=layer)
-    return output
-def sentiment_explanation_hila(input_text, layer):
-    text_batch = [input_text]
-    encoding = tokenizer(text_batch, return_tensors="pt")
-    input_ids = encoding["input_ids"].to(device)
-    attention_mask = encoding["attention_mask"].to(device)
-    # true class is positive - 1
-    true_class = 1
-    return show_explanation(model, input_ids, attention_mask, start_layer=int(layer))
-layer_slider = gradio.Slider(minimum=0, maximum=12, value=8, step=1, label="Select layer")
-hila = gradio.Interface(
-    fn=sentiment_explanation_hila,
-    inputs=["text", layer_slider],
-    outputs="html",
-)
-# layer_slider2 = gradio.Slider(minimum=0, maximum=12, value=0, step=1, label="Select IG layer")
-lig = gradio.Interface(
-    fn=sentence_sentiment,
-    inputs=["text", layer_slider],
-    outputs="html",
-)
-with open("description.md", "r") as fh:
-    description = fh.read()
 examples = pandas.read_csv("examples.csv").to_numpy().tolist()
-iface = gradio.Parallel(hila, lig, title="RoBERTa Explainability", description=description, examples=examples)
 iface.launch()

 import sys
 import pandas
 import gradio
+import pathlib
 sys.path.append("lib")
 import torch
+from roberta2 import RobertaForSequenceClassification
+from gradient_rollout import GradientRolloutExplainer
+from integrated_gradients import IntegratedGradientsExplainer
 from transformers import AutoModelForSequenceClassification
 from transformers import AutoTokenizer
 from captum.attr import LayerIntegratedGradients
 from captum.attr import visualization
+import util
 import torch
+ig_explainer = IntegratedGradientsExplainer()
+gr_explainer = GradientRolloutExplainer()
+def run(sent, rollout, ig):
+    a = gr_explainer(sent, rollout)
+    b = ig_explainer(sent, ig)
+    return a, b
 examples = pandas.read_csv("examples.csv").to_numpy().tolist()
+with gradio.Blocks(title="Explanations with attention rollout") as iface:
+    util.Markdown(pathlib.Path("description.md"))
+    with gradio.Row(equal_height=True):
+        with gradio.Column(scale=4):
+            sent = gradio.Textbox(label="Input sentence")
+        with gradio.Column(scale=1):
+            but = gradio.Button("Submit")
+    with gradio.Row(equal_height=True):
+        with gradio.Column():
+            rollout_layer = gradio.Slider(minimum=0, maximum=12, value=8, step=1, label="Select rollout start layer")
+            rollout_result = gradio.HTML()
+        with gradio.Column():
+            ig_layer = gradio.Slider(minimum=0, maximum=12, value=8, step=1, label="Select IG layer")
+            ig_result = gradio.HTML()
+    gradio.Examples(examples, [sent])
+    with gradio.Accordion("A note about explainability models"):
+        util.Markdown(pathlib.Path("notice.md"))
+    rollout_layer.change(gr_explainer, [sent, rollout_layer], rollout_result)
+    ig_layer.change(ig_explainer, [sent, ig_layer], ig_result)
+    but.click(run, [sent, rollout_layer, ig_layer], [rollout_result, ig_result])
 iface.launch()

description.md CHANGED Viewed

@@ -1,4 +1,4 @@
-# RoBERTa Explainability
 In this demo, we use the RoBERTa language model (optimized for masked language modelling and finetuned for sentiment analysis).
 The model predicts for a given sentences whether it expresses a positive, negative or neutral sentiment.
@@ -7,7 +7,7 @@ A range of so-called "attribution methods" have been developed that attempt to d
 they provide a very limited form of "explanation" -- and often disagree -- but sometimes provide good initial hypotheses nevertheless that can be further explored with other methods.
 Abnar & Zuidema (2020) proposed a method for Transformers called "Attention Rollout", which was further refined by Chefer et al. (2021) into Gradient-weighted Rollout.
-Here we compare it to another popular method called Integrated Gradient.
 * Gradient-weighted attention rollout, as defined by [Hila Chefer](https://github.com/hila-chefer)
   [(Transformer-MM_explainability)](https://github.com/hila-chefer/Transformer-MM-Explainability/), with rollout recursion upto selected layer

+# Attention Rollout -- RoBERTa
 In this demo, we use the RoBERTa language model (optimized for masked language modelling and finetuned for sentiment analysis).
 The model predicts for a given sentences whether it expresses a positive, negative or neutral sentiment.
 they provide a very limited form of "explanation" -- and often disagree -- but sometimes provide good initial hypotheses nevertheless that can be further explored with other methods.
 Abnar & Zuidema (2020) proposed a method for Transformers called "Attention Rollout", which was further refined by Chefer et al. (2021) into Gradient-weighted Rollout.
+Here we compare it to another popular method called Integrated Gradients.
 * Gradient-weighted attention rollout, as defined by [Hila Chefer](https://github.com/hila-chefer)
   [(Transformer-MM_explainability)](https://github.com/hila-chefer/Transformer-MM-Explainability/), with rollout recursion upto selected layer

lib/gradient_rollout.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import torch
+from transformers import AutoTokenizer
+from captum.attr import visualization
+from roberta2 import RobertaForSequenceClassification
+from util import visualize_text, PyTMinMaxScalerVectorized
+classifications = ["NEGATIVE", "POSITIVE"]
+class GradientRolloutExplainer:
+    def __init__(self):
+        self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+        self.model = RobertaForSequenceClassification.from_pretrained("textattack/roberta-base-SST-2").to(self.device)
+        self.model.eval()
+        self.tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-SST-2")
+    def tokens_from_ids(self, ids):
+        return list(map(lambda s: s[1:] if s[0] == "Ġ" else s, self.tokenizer.convert_ids_to_tokens(ids)))
+    def run_attribution_model(self, input_ids, attention_mask, index=None, start_layer=0):
+        def avg_heads(cam, grad):
+            cam = (grad * cam).clamp(min=0).mean(dim=-3)
+            # set negative values to 0, then average
+            #    cam = cam.clamp(min=0).mean(dim=0)
+            return cam
+        def apply_self_attention_rules(R_ss, cam_ss):
+            R_ss_addition = torch.matmul(cam_ss, R_ss)
+            return R_ss_addition
+        output = self.model(input_ids=input_ids, attention_mask=attention_mask)[0]
+        if index == None:
+            # index = np.expand_dims(np.arange(input_ids.shape[1])
+            # by default explain the class with the highest score
+            index = output.argmax(axis=-1).detach().cpu().numpy()
+        # create a one-hot vector selecting class we want explanations for
+        one_hot = (
+            torch.nn.functional.one_hot(
+                torch.tensor(index, dtype=torch.int64), num_classes=output.size(-1)
+            )
+            .to(torch.float)
+            .requires_grad_(True)
+        ).to(self.device)
+        one_hot = torch.sum(one_hot * output)
+        self.model.zero_grad()
+        # create the gradients for the class we're interested in
+        one_hot.backward(retain_graph=True)
+        num_tokens = self.model.roberta.encoder.layer[0].attention.self.get_attn().shape[-1]
+        R = torch.eye(num_tokens).expand(output.size(0), -1, -1).clone().to(self.device)
+        for i, blk in enumerate(self.model.roberta.encoder.layer):
+            if i < start_layer:
+                continue
+            grad = blk.attention.self.get_attn_gradients()
+            cam = blk.attention.self.get_attn()
+            cam = avg_heads(cam, grad)
+            joint = apply_self_attention_rules(R, cam)
+            R += joint
+        return output, R[:, 0, 1:-1]
+    def build_visualization(self, input_ids, attention_mask, index=None, start_layer=8):
+        # generate an explanation for the input
+        vis_data_records = []
+        for index in range(2):
+            output, expl = self.run_attribution_model(
+                input_ids, attention_mask, index=index, start_layer=start_layer
+            )
+            # normalize scores
+            scaler = PyTMinMaxScalerVectorized()
+            norm = scaler(expl)
+            # get the model classification
+            output = torch.nn.functional.softmax(output, dim=-1)
+            for record in range(input_ids.size(0)):
+                classification = output[record].argmax(dim=-1).item()
+                class_name = classifications[classification]
+                nrm = norm[record]
+                # if the classification is negative, higher explanation scores are more negative
+                # flip for visualization
+                #if class_name == "NEGATIVE":
+                if index == 0:
+                    nrm *= -1
+                tokens = self.tokens_from_ids(input_ids[record].flatten())[
+                    1 : 0 - ((attention_mask[record] == 0).sum().item() + 1)
+                ]
+                vis_data_records.append(
+                    visualization.VisualizationDataRecord(
+                        nrm,
+                        output[record][classification],
+                        classification,
+                        classification,
+                        index,
+                        1,
+                        tokens,
+                        1,
+                    )
+                )
+        return visualize_text(vis_data_records)
+    def __call__(self, input_text, start_layer=8):
+        text_batch = [input_text]
+        encoding = self.tokenizer(text_batch, return_tensors="pt")
+        input_ids = encoding["input_ids"].to(self.device)
+        attention_mask = encoding["attention_mask"].to(self.device)
+        return self.build_visualization(input_ids, attention_mask, start_layer=int(start_layer))

lib/integrated_gradients.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import torch
+from transformers import AutoModelForSequenceClassification
+from transformers import AutoTokenizer
+from captum.attr import LayerIntegratedGradients
+from captum.attr import visualization
+from util import visualize_text
+classifications = ["NEGATIVE", "POSITIVE"]
+class IntegratedGradientsExplainer:
+    def __init__(self):
+        self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+        self.model = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-SST-2").to(self.device)
+        self.tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-SST-2")
+        self.ref_token_id = self.tokenizer.unk_token_id
+    def tokens_from_ids(self, ids):
+        return list(map(lambda s: s[1:] if s[0] == "Ġ" else s, self.tokenizer.convert_ids_to_tokens(ids)))
+    def custom_forward(self, inputs, attention_mask=None, pos=0):
+        result = self.model(inputs, attention_mask=attention_mask, return_dict=True)
+        preds = result.logits
+        return preds
+    @staticmethod
+    def summarize_attributions(attributions):
+        attributions = attributions.sum(dim=-1).squeeze(0)
+        attributions = attributions / torch.norm(attributions)
+        return attributions
+    def run_attribution_model(self, input_ids, attention_mask, index=None, layer=None, steps=20):
+        try:
+            output = self.model(input_ids=input_ids, attention_mask=attention_mask)[0]
+            if index is None:
+                index = output.argmax(axis=-1).item()
+            ablator = LayerIntegratedGradients(self.custom_forward, layer)
+            input_tensor = input_ids
+            attention_mask = attention_mask
+            attributions = ablator.attribute(
+                    inputs=input_ids,
+                    baselines=self.ref_token_id,
+                    additional_forward_args=(attention_mask),
+                    target=index,
+                    n_steps=steps,
+            )
+            return self.summarize_attributions(attributions).unsqueeze_(0), output, index
+        finally:
+            pass
+    def build_visualization(self, input_ids, attention_mask, **kwargs):
+        vis_data_records = []
+        attributions, output, index = self.run_attribution_model(input_ids, attention_mask, **kwargs)
+        for record in range(input_ids.size(0)):
+            classification = output[record].argmax(dim=-1).item()
+            class_name = classifications[classification]
+            attr = attributions[record]
+            tokens = self.tokens_from_ids(input_ids[record].flatten())[
+                1 : 0 - ((attention_mask[record] == 0).sum().item() + 1)
+            ]
+            vis_data_records.append(
+                visualization.VisualizationDataRecord(
+                    attr,
+                    output[record][classification],
+                    classification,
+                    classification,
+                    index,
+                    1,
+                    tokens,
+                    1,
+                )
+            )
+        return visualize_text(vis_data_records)
+    def __call__(self, input_text, layer):
+        text_batch = [input_text]
+        encoding = self.tokenizer(text_batch, return_tensors="pt")
+        input_ids = encoding["input_ids"].to(self.device)
+        attention_mask = encoding["attention_mask"].to(self.device)
+        layer = int(layer)
+        if layer == 0:
+            layer = self.model.roberta.embeddings
+        else:
+            layer = getattr(self.model.roberta.encoder.layer, str(layer-1))
+        return self.build_visualization(input_ids, attention_mask, layer=layer)

lib/util.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import pathlib
+import gradio
+from captum.attr import visualization
+class Markdown(gradio.Markdown):
+    def __init__(self, value, *args, **kwargs):
+        if isinstance(value, pathlib.Path):
+            value = value.read_text()
+        elif isinstance(value, io.TextIOWrapper):
+            value = value.read()
+        super().__init__(value, *args, **kwargs)
+# from https://discuss.pytorch.org/t/using-scikit-learns-scalers-for-torchvision/53455
+class PyTMinMaxScalerVectorized(object):
+    """
+    Transforms each channel to the range [0, 1].
+    """
+    def __init__(self, dimension=-1):
+        self.d = dimension
+    def __call__(self, tensor):
+        d = self.d
+        scale = 1.0 / (
+            tensor.max(dim=d, keepdim=True)[0] - tensor.min(dim=d, keepdim=True)[0]
+        )
+        tensor.mul_(scale).sub_(tensor.min(dim=d, keepdim=True)[0])
+        return tensor
+# copied out of captum because we need raw html instead of a jupyter widget
+def visualize_text(datarecords, legend=True):
+    dom = ["<table width: 100%>"]
+    rows = [
+        "<tr><th>True Label</th>"
+        "<th>Predicted Label</th>"
+        "<th>Attribution Label</th>"
+        "<th>Attribution Score</th>"
+        "<th>Word Importance</th>"
+    ]
+    for datarecord in datarecords:
+        rows.append(
+            "".join(
+                [
+                    "<tr>",
+                    visualization.format_classname(datarecord.true_class),
+                    visualization.format_classname(
+                        "{0} ({1:.2f})".format(
+                            datarecord.pred_class, datarecord.pred_prob
+                        )
+                    ),
+                    visualization.format_classname(datarecord.attr_class),
+                    visualization.format_classname(
+                        "{0:.2f}".format(datarecord.attr_score)
+                    ),
+                    visualization.format_word_importances(
+                        datarecord.raw_input_ids, datarecord.word_attributions
+                    ),
+                    "<tr>",
+                ]
+            )
+        )
+    if legend:
+        dom.append(
+            '<div style="border-top: 1px solid; margin-top: 5px; \
+            padding-top: 5px; display: inline-block">'
+        )
+        dom.append("<b>Legend: </b>")
+        for value, label in zip([-1, 0, 1], ["Negative", "Neutral", "Positive"]):
+            dom.append(
+                '<span style="display: inline-block; width: 10px; height: 10px; \
+                border: 1px solid; background-color: \
+                {value}"></span> {label}  '.format(
+                    value=visualization._get_color(value), label=label
+                )
+            )
+        dom.append("</div>")
+    dom.append("".join(rows))
+    dom.append("</table>")
+    html = "".join(dom)
+    return html

notice.md ADDED Viewed

	@@ -0,0 +1 @@


1	+ [placeholder]