Spaces:
Running
Running
<html> | |
<head> | |
<meta charset="utf-8"> | |
<meta name="description" | |
content="Demo Page of BEYOND ICML 2024."> | |
<meta name="keywords" content="BEYOND, Adversarial Examples, Adversarial Detection"> | |
<meta name="viewport" content="width=device-width, initial-scale=1"> | |
<title>Be Your Own Neighborhood: Detecting Adversarial Examples by the Neighborhood Relations Built on Self-Supervised Learning</title> | |
<link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" | |
rel="stylesheet"> | |
<link rel="stylesheet" href="./static/css/bulma.min.css"> | |
<link rel="stylesheet" href="./static/css/bulma-carousel.min.css"> | |
<link rel="stylesheet" href="./static/css/bulma-slider.min.css"> | |
<link rel="stylesheet" href="./static/css/fontawesome.all.min.css"> | |
<link rel="stylesheet" | |
href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css"> | |
<link rel="stylesheet" href="./static/css/index.css"> | |
<link rel="stylesheet" href="./static/css/custom.css"> | |
<link rel="icon" href="./static/images/favicon.svg"> | |
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script> | |
<script defer src="./static/js/fontawesome.all.min.js"></script> | |
<script src="./static/js/bulma-carousel.min.js"></script> | |
<script src="./static/js/bulma-slider.min.js"></script> | |
<script src="./static/js/index.js"></script> | |
<!-- for mathjax support --> | |
<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> | |
<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script> | |
<script> | |
$('#adaptive-loss-formula-list').on('click', 'a', function(e) { | |
e.preventDefault(); | |
if (!$(this).hasClass('selected')) { | |
$('.formula').hide(200); | |
$('.formula-list > a').removeClass('selected'); | |
$(this).addClass('selected'); | |
var target = $(this).attr('href'); | |
$(target).show(200); | |
} | |
}); | |
</script> | |
<style type="text/css"> | |
.tg {border-collapse:collapse;border-spacing:0;} | |
.tg td{border-color:black;border-style:solid;border-width:1px;font-family:Arial, sans-serif;font-size:14px; | |
overflow:hidden;padding:10px 5px;word-break:normal;} | |
.tg th{border-color:black;border-style:solid;border-width:1px;font-family:Arial, sans-serif;font-size:14px; | |
font-weight:normal;overflow:hidden;padding:10px 5px;word-break:normal;} | |
.tg .tg-baqh{text-align:center;vertical-align:top} | |
.tg .tg-amwm{font-weight:bold;text-align:center;vertical-align:top} | |
.tg .tg-2imo{font-style:italic;text-align:center;text-decoration:underline;vertical-align:top} | |
</style> | |
</head> | |
<body> | |
<section class="hero"> | |
<div class="hero-body"> | |
<div class="container is-max-desktop"> | |
<div class="columns is-centered"> | |
<div class="column has-text-centered"> | |
<h1 class="title is-1 publication-title">Be Your Own Neighborhood: Detecting Adversarial Examples by the Neighborhood Relations Built on Self-Supervised Learning</h1> | |
<div class="is-size-5 publication-authors"> | |
<span class="author-block"> | |
<a href="#" target="_blank">Zhiyuan He</a><sup>1*</sup>,</span> | |
<span class="author-block"> | |
<a href="https://yangyijune.github.io/" target="_blank">Yijun Yang</a><sup>1*</sup>,</span> | |
<span class="author-block"> | |
<a href="https://sites.google.com/site/pinyuchenpage/home" target="_blank">Pin-Yu Chen</a><sup>2</sup>, | |
</span> | |
<span class="author-block"> | |
<a href="https://cure-lab.github.io/" target="_blank">Qiang Xu</a><sup>1</sup>, | |
</span> | |
<span class="author-block"> | |
<a href="https://tsungyiho.github.io/" target="_blank">Tsung-Yi Ho</a><sup>1</sup>, | |
</span> | |
</div> | |
<div class="is-size-5 publication-authors"> | |
<span class="author-block"><sup>*</sup>Equal contribution,</span> | |
<span class="author-block"><sup>1</sup>The Chinese University of Hong Kong,</span> | |
<span class="author-block"><sup>2</sup>IBM Research</span> | |
</div> | |
<div class="column has-text-centered"> | |
<div class="publication-links"> | |
<!-- PDF Link. --> | |
<span class="link-block"> | |
<a href="https://arxiv.org/abs/2209.00005" target="_blank" | |
class="external-link button is-normal is-rounded is-dark"> | |
<span class="icon"> | |
<i class="fas fa-file-pdf"></i> | |
</span> | |
<span>Paper</span> | |
</a> | |
</span> | |
<span class="link-block"> | |
<a href="https://arxiv.org/abs/2209.00005" target="_blank" | |
class="external-link button is-normal is-rounded is-dark"> | |
<span class="icon"> | |
<i class="ai ai-arxiv"></i> | |
</span> | |
<span>arXiv</span> | |
</a> | |
</span> | |
<!-- Video Link. --> | |
<!-- <span class="link-block"> | |
<a href="https://www.youtube.com/watch?v=MrKrnHhk8IA" target="_blank" | |
class="external-link button is-normal is-rounded is-dark"> | |
<span class="icon"> | |
<i class="fab fa-youtube"></i> | |
</span> | |
<span>Video</span> | |
</a> | |
</span> --> | |
<!-- Code Link. --> | |
<!-- <span class="link-block"> | |
<a href="https://github.com/google/nerfies" target="_blank" | |
class="external-link button is-normal is-rounded is-dark"> | |
<span class="icon"> | |
<i class="fab fa-github"></i> | |
</span> | |
<span>Code</span> | |
</a> | |
</span> --> | |
</div> | |
</div> | |
</div> | |
</div> | |
</div> | |
</div> | |
</section> | |
<!-- <section class="hero teaser"> | |
<div class="container is-max-desktop"> | |
<div class="hero-body"> | |
<video id="teaser" autoplay muted loop playsinline height="100%"> | |
<source src="./static/videos/teaser.mp4" | |
type="video/mp4"> | |
</video> | |
<h2 class="subtitle has-text-centered"> | |
<span class="dnerf">Nerfies</span> turns selfie videos from your phone into | |
free-viewpoint | |
portraits. | |
</h2> | |
</div> | |
</div> | |
</section> --> | |
<!-- <section class="hero is-light is-small"> | |
<div class="hero-body"> | |
<div class="container"> | |
<div id="results-carousel" class="carousel results-carousel"> | |
<div class="item item-steve"> | |
<video poster="" id="steve" autoplay controls muted loop playsinline height="100%"> | |
<source src="./static/videos/steve.mp4" | |
type="video/mp4"> | |
</video> | |
</div> | |
<div class="item item-chair-tp"> | |
<video poster="" id="chair-tp" autoplay controls muted loop playsinline height="100%"> | |
<source src="./static/videos/chair-tp.mp4" | |
type="video/mp4"> | |
</video> | |
</div> | |
<div class="item item-shiba"> | |
<video poster="" id="shiba" autoplay controls muted loop playsinline height="100%"> | |
<source src="./static/videos/shiba.mp4" | |
type="video/mp4"> | |
</video> | |
</div> | |
<div class="item item-fullbody"> | |
<video poster="" id="fullbody" autoplay controls muted loop playsinline height="100%"> | |
<source src="./static/videos/fullbody.mp4" | |
type="video/mp4"> | |
</video> | |
</div> | |
<div class="item item-blueshirt"> | |
<video poster="" id="blueshirt" autoplay controls muted loop playsinline height="100%"> | |
<source src="./static/videos/blueshirt.mp4" | |
type="video/mp4"> | |
</video> | |
</div> | |
<div class="item item-mask"> | |
<video poster="" id="mask" autoplay controls muted loop playsinline height="100%"> | |
<source src="./static/videos/mask.mp4" | |
type="video/mp4"> | |
</video> | |
</div> | |
<div class="item item-coffee"> | |
<video poster="" id="coffee" autoplay controls muted loop playsinline height="100%"> | |
<source src="./static/videos/coffee.mp4" | |
type="video/mp4"> | |
</video> | |
</div> | |
<div class="item item-toby"> | |
<video poster="" id="toby" autoplay controls muted loop playsinline height="100%"> | |
<source src="./static/videos/toby2.mp4" | |
type="video/mp4"> | |
</video> | |
</div> | |
</div> | |
</div> | |
</div> | |
</section> --> | |
<section class="section"> | |
<div class="container is-max-desktop"> | |
<!-- Abstract. --> | |
<div class="columns is-centered has-text-centered"> | |
<div class="column is-four-fifths"> | |
<h2 class="title is-3">Abstract</h2> | |
<div class="content has-text-justified"> | |
<p> | |
Deep Neural Networks (DNNs) have achieved excellent performance in various fields. However, DNNs’ vulnerability to | |
Adversarial Examples (AE) hinders their deployments to safety-critical applications. In this paper, we present <strong>BEYOND</strong>, | |
an innovative AE detection frameworkdesigned for reliable predictions. BEYOND identifies AEs by distinguishing the AE’s | |
abnormal relation with its augmented versions, i.e. neighbors, from two prospects: representation similarity and label | |
consistency. An off-the-shelf Self-Supervised Learning (SSL) model is used to extract the representation and predict the | |
label for its highly informative representation capacity compared to supervised learning models. We found clean samples | |
maintain a high degree of representation similarity and label consistency relative to their neighbors, in contrast to AEs | |
which exhibit significant discrepancies. We explain this obser vation and show that leveraging this discrepancy BEYOND can | |
accurately detect AEs. Additionally, we develop a rigorous justification for the effectiveness of BEYOND. Furthermore, as a | |
plug-and-play model, BEYOND can easily cooperate with the Adversarial Trained Classifier (ATC), achieving state-of-the-art | |
(SOTA) robustness accuracy. Experimental results show that BEYOND outperforms baselines by a large margin, especially under | |
adaptive attacks. Empowered by the robust relationship built on SSL, we found that BEYOND outperforms baselines in terms | |
of both detection ability and speed. | |
</p> | |
</div> | |
</div> | |
</div> | |
<!--/ Abstract. --> | |
</div> | |
</section> | |
<!-- Relations --> | |
<section class="section"> | |
<div class="container is-max-desktop"> | |
<h2 class="title is-3">Neighborhood Relations of Benign Examples and AEs</h2> | |
<div class="columns is-centered"> | |
<div class="column container-centered is-four-fifths"> | |
<img src="./static/images/relations.jpg" alt="Neighborhood Relations of Benign Examples and AEs"/> | |
</div> | |
</div> | |
<div class="columns is-centered"> | |
<div class="column has-text-justified is-four-fifths"> | |
<p> | |
<strong>Figure 1. Neighborhood Relations of Benign Examples and AEs.</strong> | |
</p> | |
</div> | |
</div> | |
<div class="columns is-centered"> | |
<div class="column has-text-justified"> | |
<p> | |
Latent Neighborhood Graph (LNG) represents the relationship between the input sample and the reference sample as a graph, | |
whose nodes are embeddings extracted by DDN and edges are built according to distances between the input node and reference nodes, | |
and train a graph neural network to detect AEs. | |
</p> | |
</div> | |
</div> | |
</div> | |
</section> | |
<!-- Relations --> | |
<!-- Overview --> | |
<section class="section"> | |
<div class="container is-max-desktop"> | |
<h2 class="title is-3">Method Overview of BEYOND</h2> | |
<div class="columns is-centered"> | |
<div class="column container-centered"> | |
<img src="./static/images/overview.png" alt="Method Overview of BEYOND"/> | |
<p><strong>Figure 2. Overview of BEYOND.</strong> First, we augment the input image to obtain a bunch of its neighbors. Then, we | |
perform the label consistency detection mechanism on the classifier’s prediction of the input image and that of neighbors predicted by | |
SSL’s classification head. Meanwhile, the representation similarity mechanism employs cosine distance to measure the similarity among | |
the input image and its neighbors. Finally, The input image with poor label consistency or representation similarity is flagged as AE.</p> | |
</div> | |
</div> | |
</div> | |
</section> | |
<!-- Overview --> | |
<!-- Results --> | |
<section class="section"> | |
<div class="container is-max-desktop"> | |
<h2 class="title is-3">Detection Performance</h2> | |
<div class="columns is-centered"> | |
<div class="column container-centered"> | |
<table class="tg" border="1" style="width:100%;"> | |
<caption><strong>Table 1.</strong>The Area Under the ROC Curve (AUC) of Different Adversarial Detection Approaches on CIFAR-10. LNG | |
is not open-sourced and the data comes from its report. To align with baselines, classifier: ResNet110, FGSM: ε = 0.05, PGD: | |
ε = 0.02. Note that BEYOND needs no AE for training, leading to the same value on both seen and unseen settings. The <strong>bold</strong> values | |
are the best performance, and the <u><i>underlined italicized</i></u> values are the second-best performanc</caption> | |
<thead> | |
<tr> | |
<th class="tg-amwm" rowspan="2">AUC(%)</th> | |
<th class="tg-baqh" colspan="4"><span style="font-weight:bold;font-style:italic">Unse</span><span style="font-weight:bold">e</span><span style="font-weight:bold;font-style:italic">n</span><span style="font-weight:bold">: </span>Attacks used in training are preclude from tests</th> | |
<th class="tg-baqh" colspan="5"><span style="font-weight:bold;font-style:italic">Seen</span><span style="font-weight:bold">:</span> Attacks used in training are included in tests</th> | |
</tr> | |
<tr> | |
<th class="tg-baqh">FGSM</th> | |
<th class="tg-baqh">PGD</th> | |
<th class="tg-baqh">AutoAttack</th> | |
<th class="tg-baqh">Square</th> | |
<th class="tg-baqh">FGSM</th> | |
<th class="tg-baqh">PGD</th> | |
<th class="tg-baqh">CW</th> | |
<th class="tg-baqh">AutoAttack</th> | |
<th class="tg-baqh">Square</th> | |
</tr> | |
</thead> | |
<tbody> | |
<tr> | |
<td class="tg-baqh">DkNN</td> | |
<td class="tg-baqh">61.55</td> | |
<td class="tg-baqh">51.22</td> | |
<td class="tg-baqh">52.12</td> | |
<td class="tg-baqh">59.46</td> | |
<td class="tg-baqh">61.55</td> | |
<td class="tg-baqh">51.22</td> | |
<td class="tg-baqh">61.52</td> | |
<td class="tg-baqh">52.12</td> | |
<td class="tg-baqh">59.46</td> | |
</tr> | |
<tr> | |
<td class="tg-baqh">kNN</td> | |
<td class="tg-baqh">61.83</td> | |
<td class="tg-baqh">54.52</td> | |
<td class="tg-baqh">52.67</td> | |
<td class="tg-baqh">73.39</td> | |
<td class="tg-baqh">61.83</td> | |
<td class="tg-baqh">54.52</td> | |
<td class="tg-baqh">62.23</td> | |
<td class="tg-baqh">52.67</td> | |
<td class="tg-baqh">73.39</td> | |
</tr> | |
<tr> | |
<td class="tg-baqh">LID</td> | |
<td class="tg-baqh">71.08</td> | |
<td class="tg-baqh">61.33</td> | |
<td class="tg-baqh">55.56</td> | |
<td class="tg-baqh">66.18</td> | |
<td class="tg-baqh">73.61</td> | |
<td class="tg-baqh">67.98</td> | |
<td class="tg-baqh">55.68</td> | |
<td class="tg-baqh">56.33</td> | |
<td class="tg-baqh">85.94</td> | |
</tr> | |
<tr> | |
<td class="tg-baqh">Hu</td> | |
<td class="tg-baqh">84.51</td> | |
<td class="tg-baqh">58.59</td> | |
<td class="tg-baqh">53.55</td> | |
<td class="tg-2imo">95.82</td> | |
<td class="tg-baqh">84.51</td> | |
<td class="tg-baqh">58.59</td> | |
<td class="tg-2imo">91.02</td> | |
<td class="tg-baqh">53.55</td> | |
<td class="tg-baqh">95.82</td> | |
</tr> | |
<tr> | |
<td class="tg-baqh">Mao</td> | |
<td class="tg-baqh">95.33</td> | |
<td class="tg-2imo">82.61</td> | |
<td class="tg-2imo">81.95</td> | |
<td class="tg-baqh">85.76</td> | |
<td class="tg-baqh">95.33</td> | |
<td class="tg-baqh">82.61</td> | |
<td class="tg-baqh">83.10</td> | |
<td class="tg-baqh">81.95</td> | |
<td class="tg-baqh">85.76</td> | |
</tr> | |
<tr> | |
<td class="tg-baqh">LNG</td> | |
<td class="tg-2imo">98.51 </td> | |
<td class="tg-baqh">63.14 </td> | |
<td class="tg-baqh">58.47 </td> | |
<td class="tg-baqh">94.71 </td> | |
<td class="tg-amwm">99.88 </td> | |
<td class="tg-2imo">91.39 </td> | |
<td class="tg-baqh">89.74 </td> | |
<td class="tg-2imo">84.03 </td> | |
<td class="tg-2imo">98.82 </td> | |
</tr> | |
<tr> | |
<td class="tg-baqh">BEYOND</td> | |
<td class="tg-amwm">98.89</td> | |
<td class="tg-amwm">99.28</td> | |
<td class="tg-amwm">99.16</td> | |
<td class="tg-amwm">99.27</td> | |
<td class="tg-2imo">98.89</td> | |
<td class="tg-amwm">99.28</td> | |
<td class="tg-amwm">99.20</td> | |
<td class="tg-amwm">99.16</td> | |
<td class="tg-amwm">99.27</td> | |
</tr> | |
</tbody> | |
</table> | |
</div> | |
</div> | |
</div> | |
</section> | |
<!-- Results --> | |
<!-- Adaptive Attack --> | |
<section class="section"> | |
<div class="container is-max-desktop"> | |
<h2 class="title is-3">Adaptive Attack</h2> | |
<div class="columns is-centered"> | |
<div class="column container-centered"> | |
<div id="adaptive-loss-formula" class="container"> | |
<div id="adaptive-loss-formula-list" class="row align-items-center formula-list"> | |
<a href="#label-loss" class="selected">Label Loss</a> | |
<a href="#representation-loss">Representation Loss</a> | |
<a href="#total-loss">Total Loss</a> | |
<div style="clear: both"></div> | |
</div> | |
<div id="adaptive" class="row align-items-center"> | |
<span id="label-loss" class="formula" style=""> | |
$$ | |
\displaystyle | |
\begin{aligned} | |
\phi_\theta(x)&=1-\mathbb{E}_{y \sim T_\theta(x)} JB(y)\\ | |
JB (y) &= \begin{cases} | |
1 \text{, if $y$ contains any jailbreak keyword;} \\ | |
0 \text{, otherwise.} | |
\end{cases} | |
\end{aligned} | |
$$ | |
</span> | |
<span id="representation-loss" class="formula" style="display: none;"> | |
$$ | |
\displaystyle | |
\begin{aligned} | |
f_\theta(x) &=1-\frac{1}{N}\sum_{i=1}^N JB(y_i)\\ | |
JB (y_i) &= \begin{cases} | |
1 \text{, if $y_i$ contains any jailbreak keyword;} \\ | |
0 \text{, otherwise.} | |
\end{cases} | |
\end{aligned} | |
$$ | |
</span> | |
<span id="total-loss" class="formula" style="display: none;"> | |
$$\displaystyle g_\theta(x)=\sum_{i=1}^P \frac{f_\theta(x\oplus \mu u_i)-f_\theta(x)}{\mu} u_i $$ | |
</span> | |
</div> | |
</div> | |
</div> | |
</div> | |
<div class="columns is-centered"> | |
<div class="column"> | |
<div class="content"> | |
<h2 class="title is-4">Performance against Adaptive Attacks</h2> | |
</div> | |
<div class="column"> | |
<div class="content"> | |
<h2 class="title is-4">Contribution of Representation Similarity & Label Con- | |
sistency against Adaptive Attacks</h2> | |
</div> | |
</div> | |
</div> | |
</div> | |
</section> | |
<!-- Adaptive Attack --> | |
<section class="section" id="BibTeX"> | |
<div class="container is-max-desktop content"> | |
<h2 class="title">BibTeX</h2> | |
<pre><code>@article{he2024beyond, | |
author = {Zhiyuan, He and Yijun, Yang and Pin-Yu, Chen and Qiang, Xu and Tsung-Yi, Ho}, | |
title = {Be your own neighborhood: Detecting adversarial example by the neighborhood relations built on self-supervised learning}, | |
journal = {ICML}, | |
year = {2024}, | |
}</code></pre> | |
</div> | |
</section> | |
<footer class="footer"> | |
<div class="container"> | |
<div class="content has-text-centered"> | |
<a class="icon-link" target="_blank" | |
href="./static/videos/nerfies_paper.pdf"> | |
<i class="fas fa-file-pdf"></i> | |
</a> | |
<a class="icon-link" href="https://github.com/keunhong" target="_blank" class="external-link" disabled> | |
<i class="fab fa-github"></i> | |
</a> | |
</div> | |
<div class="columns is-centered"> | |
<div class="column is-8"> | |
<div class="content"> | |
<p> | |
This website is licensed under a <a rel="license" target="_blank" | |
href="http://creativecommons.org/licenses/by-sa/4.0/">Creative | |
Commons Attribution-ShareAlike 4.0 International License</a>. | |
</p> | |
<p> | |
This means you are free to borrow the <a target="_blank" | |
href="https://github.com/nerfies/nerfies.github.io">source code</a> of this website, | |
we just ask that you link back to this page in the footer. | |
Please remember to remove the analytics code included in the header of the website which | |
you do not want on your website. | |
</p> | |
</div> | |
</div> | |
</div> | |
</div> | |
</footer> | |
</body> | |
</html> | |