Spaces:

protectai
/

prompt-injection-benchmark

Running

App Files Files Community

asofter commited on Nov 20, 2024

Commit

e266db4

unverified ·

1 Parent(s): 360751a

* upgrade

Browse files

Files changed (3) hide show

README.md +1 -1
app.py +0 -62
requirements.txt +6 -7

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 📝
 colorFrom: yellow
 colorTo: gray
 sdk: gradio
-sdk_version: 4.44.0
 pinned: true
 license: apache-2.0
 ---

 colorFrom: yellow
 colorTo: gray
 sdk: gradio
+sdk_version: 5.6.0
 pinned: true
 license: apache-2.0
 ---

app.py CHANGED Viewed

@@ -15,7 +15,6 @@ import gradio as gr
 import requests
 from huggingface_hub import HfApi
 from optimum.onnxruntime import ORTModelForSequenceClassification
-from rebuff import Rebuff
 from transformers import AutoTokenizer, pipeline
 logging.basicConfig(level=logging.INFO)
@@ -26,8 +25,6 @@ hf_api = HfApi(token=hf_token)
 num_processes = 2  # mp.cpu_count()
 lakera_api_key = os.getenv("LAKERA_API_KEY")
-sydelabs_api_key = os.getenv("SYDELABS_API_KEY")
-rebuff_api_key = os.getenv("REBUFF_API_KEY")
 azure_content_safety_endpoint = os.getenv("AZURE_CONTENT_SAFETY_ENDPOINT")
 azure_content_safety_key = os.getenv("AZURE_CONTENT_SAFETY_KEY")
 bedrock_runtime_client = boto3.client('bedrock-runtime', region_name="us-east-1")
@@ -70,12 +67,6 @@ deepset_classifier = init_prompt_injection_model(
 protectai_v2_classifier = init_prompt_injection_model(
     "protectai/deberta-v3-base-prompt-injection-v2", "onnx"
 )
-fmops_classifier = init_prompt_injection_model(
-    "protectai/fmops-distilbert-prompt-injection-onnx"
-)  # ONNX version of fmops/distilbert-prompt-injection
-protectai_v2_small_classifier = init_prompt_injection_model(
-    "protectai/deberta-v3-small-prompt-injection-v2", "onnx"
-)  # ONNX version of protectai/deberta-v3-small-prompt-injection-v2
 def detect_hf(
@@ -103,18 +94,10 @@ def detect_hf_protectai_v2(prompt: str) -> (bool, bool):
     return detect_hf(prompt, classifier=protectai_v2_classifier)
-def detect_hf_protectai_v2_small(prompt: str) -> (bool, bool):
-    return detect_hf(prompt, classifier=protectai_v2_small_classifier)
 def detect_hf_deepset(prompt: str) -> (bool, bool):
     return detect_hf(prompt, classifier=deepset_classifier)
-def detect_hf_fmops(prompt: str) -> (bool, bool):
-    return detect_hf(prompt, classifier=fmops_classifier, label="LABEL_1")
 def detect_lakera(prompt: str) -> (bool, bool):
     try:
         response = requests.post(
@@ -131,18 +114,6 @@ def detect_lakera(prompt: str) -> (bool, bool):
         return False, False
-def detect_rebuff(prompt: str) -> (bool, bool):
-    try:
-        rb = Rebuff(api_token=rebuff_api_key, api_url="https://www.rebuff.ai")
-        result = rb.detect_injection(prompt)
-        logger.info(f"Prompt injection result from Rebuff: {result}")
-        return True, result.injectionDetected
-    except Exception as err:
-        logger.error(f"Failed to call Rebuff API: {err}")
-        return False, False
 def detect_azure(prompt: str) -> (bool, bool):
     try:
         response = requests.post(
@@ -179,44 +150,11 @@ def detect_aws_bedrock(prompt: str) -> (bool, bool):
     return True, response['action'] != 'NONE'
-def detect_sydelabs(prompt: str) -> (bool, bool):
-    try:
-        response = requests.post(
-            "https://guard.sydelabs.ai/api/v1/guard/generate-score",
-            json={"prompt": prompt},
-            headers={
-                "Authorization": f"Bearer {lakera_api_key}",
-                "X-Api-Key": sydelabs_api_key,
-            },
-        )
-        response_json = response.json()
-        logger.info(f"Prompt injection result from SydeLabs: {response.json()}")
-        prompt_injection_risk = next(
-            (
-                category["risk"]
-                for category in response_json["category_scores"]
-                if category["category"] == "PROMPT_INJECT"
-            ),
-            False,
-        )
-        return True, prompt_injection_risk
-    except requests.RequestException as err:
-        logger.error(f"Failed to call SydeLabs API: {err}")
-        return False, False
 detection_providers = {
     "ProtectAI v2 (HF model)": detect_hf_protectai_v2,
-    "ProtectAI v2 Small (HF model)": detect_hf_protectai_v2_small,
     "Deepset (HF model)": detect_hf_deepset,
-    "FMOps (HF model)": detect_hf_fmops,
     "Lakera Guard": detect_lakera,
-    # "Rebuff": detect_rebuff,
     "Azure Content Safety": detect_azure,
-    "SydeLabs": detect_sydelabs,
     "AWS Bedrock Guardrails": detect_aws_bedrock,
 }

 import requests
 from huggingface_hub import HfApi
 from optimum.onnxruntime import ORTModelForSequenceClassification
 from transformers import AutoTokenizer, pipeline
 logging.basicConfig(level=logging.INFO)
 num_processes = 2  # mp.cpu_count()
 lakera_api_key = os.getenv("LAKERA_API_KEY")
 azure_content_safety_endpoint = os.getenv("AZURE_CONTENT_SAFETY_ENDPOINT")
 azure_content_safety_key = os.getenv("AZURE_CONTENT_SAFETY_KEY")
 bedrock_runtime_client = boto3.client('bedrock-runtime', region_name="us-east-1")
 protectai_v2_classifier = init_prompt_injection_model(
     "protectai/deberta-v3-base-prompt-injection-v2", "onnx"
 )
 def detect_hf(
     return detect_hf(prompt, classifier=protectai_v2_classifier)
 def detect_hf_deepset(prompt: str) -> (bool, bool):
     return detect_hf(prompt, classifier=deepset_classifier)
 def detect_lakera(prompt: str) -> (bool, bool):
     try:
         response = requests.post(
         return False, False
 def detect_azure(prompt: str) -> (bool, bool):
     try:
         response = requests.post(
     return True, response['action'] != 'NONE'
 detection_providers = {
     "ProtectAI v2 (HF model)": detect_hf_protectai_v2,
     "Deepset (HF model)": detect_hf_deepset,
     "Lakera Guard": detect_lakera,
     "Azure Content Safety": detect_azure,
     "AWS Bedrock Guardrails": detect_aws_bedrock,
 }

requirements.txt CHANGED Viewed

@@ -1,8 +1,7 @@
-boto3==1.35.22
-gradio==4.44.0
-huggingface_hub==0.25.0
-onnxruntime==1.19.2
-optimum[onnxruntime]==1.22.0
-rebuff==0.1.1
 requests==2.32.3
-transformers==4.44.2

+boto3==1.35.65
+gradio==5.6.0
+huggingface_hub==0.26.2
+onnxruntime==1.20.0
+optimum[onnxruntime]==1.23.3
 requests==2.32.3
+transformers==4.46.3