asofter commited on
Commit
e18c8b0
1 Parent(s): 54917d7
Files changed (9) hide show
  1. .gitignore +1 -0
  2. Dockerfile +32 -0
  3. README.md +31 -4
  4. app.py +139 -0
  5. output.py +381 -0
  6. output_text.txt +20 -0
  7. prompt.py +338 -0
  8. prompt_text.txt +15 -0
  9. requirements.txt +4 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ venv
Dockerfile ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ RUN apt-get update && apt-get install -y \
4
+ build-essential \
5
+ curl \
6
+ software-properties-common \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ WORKDIR /app
10
+
11
+ COPY ./requirements.txt /app/requirements.txt
12
+
13
+ RUN pip3 install --upgrade pip
14
+ RUN pip3 install -r requirements.txt
15
+ RUN python3 -m spacy download en_core_web_trf
16
+
17
+ EXPOSE 7860
18
+
19
+ COPY . /app
20
+
21
+ RUN useradd -m -u 1000 user
22
+ USER user
23
+ ENV HOME=/home/user \
24
+ PATH=/home/user/.local/bin:$PATH
25
+
26
+ WORKDIR $HOME/app
27
+
28
+ COPY --chown=user . $HOME/app
29
+
30
+ HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health
31
+
32
+ CMD python -m streamlit run app.py --server.port=7860 --server.address=0.0.0.0
README.md CHANGED
@@ -1,11 +1,38 @@
1
  ---
2
- title: Llm Guard Demo
3
  emoji: 🏢
4
- colorFrom: red
5
- colorTo: red
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: LLM Guard Demo
3
  emoji: 🏢
4
+ colorFrom: blue
5
+ colorTo: gray
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
  ---
10
 
11
+ # Simple demo website for LLM Guard
12
+
13
+ Here's a simple app, written in pure Python, to create a demo website for LLM Guard.
14
+ The app is based on the [streamlit](https://streamlit.io/) package.
15
+
16
+ A live version can be found here: https://huggingface.co/spaces/laiyer/llm-guard-demo
17
+
18
+ ## Requirements
19
+
20
+ 1. Clone the repo and move to the `examples/demo` folder
21
+
22
+ 2. Install dependencies (preferably in a virtual environment)
23
+
24
+ ```sh
25
+ pip install -r requirements.txt
26
+ ```
27
+
28
+ 3. Start the app:
29
+
30
+ ```sh
31
+ streamlit run app.py
32
+ ```
33
+
34
+ ## Output
35
+
36
+ Output should be similar to this screenshot:
37
+ ![image](./screenshot.png)
38
+
app.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import time
3
+ import traceback
4
+ from datetime import timedelta
5
+
6
+ import pandas as pd
7
+ import spacy
8
+ import streamlit as st
9
+ from output import init_settings as init_output_settings
10
+ from output import scan as scan_output
11
+ from prompt import init_settings as init_prompt_settings
12
+ from prompt import scan as scan_prompt
13
+
14
+ from llm_guard.vault import Vault
15
+
16
+ if not spacy.util.is_package("en_core_web_trf"):
17
+ spacy.cli.download("en_core_web_trf")
18
+
19
+ PROMPT = "prompt"
20
+ OUTPUT = "output"
21
+ vault = Vault()
22
+
23
+ st.set_page_config(
24
+ page_title="LLM Guard demo",
25
+ layout="wide",
26
+ initial_sidebar_state="expanded",
27
+ menu_items={
28
+ "About": "https://laiyer-ai.github.io/llm-guard/",
29
+ },
30
+ )
31
+
32
+ logger = logging.getLogger("llm-guard-demo")
33
+ logger.setLevel(logging.INFO)
34
+
35
+ # Sidebar
36
+ st.sidebar.header(
37
+ """
38
+ Scanning prompt and output using [LLM Guard](https://laiyer-ai.github.io/llm-guard/)
39
+ """
40
+ )
41
+
42
+ scanner_type = st.sidebar.selectbox("Type", [PROMPT, OUTPUT], index=0)
43
+
44
+ enabled_scanners = None
45
+ settings = None
46
+ if scanner_type == PROMPT:
47
+ enabled_scanners, settings = init_prompt_settings()
48
+ elif scanner_type == OUTPUT:
49
+ enabled_scanners, settings = init_output_settings()
50
+
51
+ # Main pannel
52
+ with st.expander("About this demo", expanded=False):
53
+ st.info(
54
+ """LLM-Guard is a comprehensive tool designed to fortify the security of Large Language Models (LLMs).
55
+ \n\n[Code](https://github.com/laiyer-ai/llm-guard) |
56
+ [Documentation](https://laiyer-ai.github.io/llm-guard/)"""
57
+ )
58
+
59
+ st.markdown(
60
+ "[![Pypi Downloads](https://img.shields.io/pypi/dm/llm-guard.svg)](https://img.shields.io/pypi/dm/llm-guard.svg)" # noqa
61
+ "[![MIT license](https://img.shields.io/badge/license-MIT-brightgreen.svg)](https://opensource.org/licenses/MIT)"
62
+ "![GitHub Repo stars](https://img.shields.io/github/stars/laiyer-ai/llm-guard?style=social)"
63
+ )
64
+
65
+ analyzer_load_state = st.info("Starting LLM Guard...")
66
+
67
+ analyzer_load_state.empty()
68
+
69
+ # Read default text
70
+ with open("prompt_text.txt") as f:
71
+ demo_prompt_text = f.readlines()
72
+
73
+ with open("output_text.txt") as f:
74
+ demo_output_text = f.readlines()
75
+
76
+ # Before:
77
+ st.subheader("Guard Prompt" if scanner_type == PROMPT else "Guard Output")
78
+
79
+ if scanner_type == PROMPT:
80
+ st_prompt_text = st.text_area(
81
+ label="Enter prompt", value="".join(demo_prompt_text), height=200, key="prompt_text_input"
82
+ )
83
+ elif scanner_type == OUTPUT:
84
+ col1, col2 = st.columns(2)
85
+ st_prompt_text = col1.text_area(
86
+ label="Enter prompt", value="".join(demo_prompt_text), height=300, key="prompt_text_input"
87
+ )
88
+
89
+ st_output_text = col2.text_area(
90
+ label="Enter output", value="".join(demo_output_text), height=300, key="output_text_input"
91
+ )
92
+
93
+ st_result_text = None
94
+ st_analysis = None
95
+ st_is_valid = None
96
+ st_time_delta = None
97
+
98
+ try:
99
+ with st.form("text_form", clear_on_submit=False):
100
+ submitted = st.form_submit_button("Process")
101
+ if submitted:
102
+ results_valid = {}
103
+ results_score = {}
104
+
105
+ start_time = time.monotonic()
106
+ if scanner_type == PROMPT:
107
+ st_result_text, results_valid, results_score = scan_prompt(
108
+ vault, enabled_scanners, settings, st_prompt_text
109
+ )
110
+ elif scanner_type == OUTPUT:
111
+ st_result_text, results_valid, results_score = scan_output(
112
+ vault, enabled_scanners, settings, st_prompt_text, st_output_text
113
+ )
114
+ end_time = time.monotonic()
115
+ st_time_delta = timedelta(seconds=end_time - start_time)
116
+
117
+ st_is_valid = all(results_valid.values())
118
+ st_analysis = [
119
+ {"scanner": k, "is valid": results_valid[k], "risk score": results_score[k]}
120
+ for k in results_valid
121
+ ]
122
+
123
+ except Exception as e:
124
+ logger.error(e)
125
+ traceback.print_exc()
126
+ st.error(e)
127
+
128
+ # After:
129
+ if st_is_valid is not None:
130
+ execution_time_ms = round(st_time_delta.total_seconds() * 1000)
131
+ st.subheader(f"Results - {'valid' if st_is_valid else 'invalid'} ({execution_time_ms} ms)")
132
+
133
+ col1, col2 = st.columns(2)
134
+
135
+ with col1:
136
+ st.text_area(label="Sanitized text", value=st_result_text, height=400)
137
+
138
+ with col2:
139
+ st.table(pd.DataFrame(st_analysis))
output.py ADDED
@@ -0,0 +1,381 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Dict, List
3
+
4
+ import streamlit as st
5
+ from streamlit_tags import st_tags
6
+
7
+ from llm_guard.input_scanners.anonymize import default_entity_types
8
+ from llm_guard.output_scanners import (
9
+ BanSubstrings,
10
+ BanTopics,
11
+ Bias,
12
+ Code,
13
+ Deanonymize,
14
+ MaliciousURLs,
15
+ NoRefusal,
16
+ Refutation,
17
+ Regex,
18
+ Relevance,
19
+ Sensitive,
20
+ )
21
+ from llm_guard.output_scanners.sentiment import Sentiment
22
+ from llm_guard.output_scanners.toxicity import Toxicity
23
+ from llm_guard.vault import Vault
24
+
25
+ logger = logging.getLogger("llm-guard-demo")
26
+
27
+
28
+ def init_settings() -> (List, Dict):
29
+ all_scanners = [
30
+ "BanSubstrings",
31
+ "BanTopics",
32
+ "Bias",
33
+ "Code",
34
+ "Deanonymize",
35
+ "MaliciousURLs",
36
+ "NoRefusal",
37
+ "Refutation",
38
+ "Regex",
39
+ "Relevance",
40
+ "Sensitive",
41
+ "Sentiment",
42
+ "Toxicity",
43
+ ]
44
+
45
+ st_enabled_scanners = st.sidebar.multiselect(
46
+ "Select scanners",
47
+ options=all_scanners,
48
+ default=all_scanners,
49
+ help="The list can be found here: https://laiyer-ai.github.io/llm-guard/output_scanners/bias/",
50
+ )
51
+
52
+ settings = {}
53
+
54
+ if "BanSubstrings" in st_enabled_scanners:
55
+ st_bs_expander = st.sidebar.expander(
56
+ "Ban Substrings",
57
+ expanded=False,
58
+ )
59
+
60
+ with st_bs_expander:
61
+ st_bs_substrings = st.text_area(
62
+ "Enter substrings to ban (one per line)",
63
+ value="test\nhello\nworld\n",
64
+ height=200,
65
+ ).split("\n")
66
+
67
+ st_bs_match_type = st.selectbox("Match type", ["str", "word"])
68
+ st_bs_case_sensitive = st.checkbox("Case sensitive", value=False)
69
+
70
+ settings["BanSubstrings"] = {
71
+ "substrings": st_bs_substrings,
72
+ "match_type": st_bs_match_type,
73
+ "case_sensitive": st_bs_case_sensitive,
74
+ }
75
+
76
+ if "BanTopics" in st_enabled_scanners:
77
+ st_bt_expander = st.sidebar.expander(
78
+ "Ban Topics",
79
+ expanded=False,
80
+ )
81
+
82
+ with st_bt_expander:
83
+ st_bt_topics = st_tags(
84
+ label="List of topics",
85
+ text="Type and press enter",
86
+ value=["politics", "religion", "money", "crime"],
87
+ suggestions=[],
88
+ maxtags=30,
89
+ key="bt_topics",
90
+ )
91
+
92
+ st_bt_threshold = st.slider(
93
+ label="Threshold",
94
+ value=0.75,
95
+ min_value=0.0,
96
+ max_value=1.0,
97
+ step=0.05,
98
+ key="ban_topics_threshold",
99
+ )
100
+
101
+ settings["BanTopics"] = {"topics": st_bt_topics, "threshold": st_bt_threshold}
102
+
103
+ if "Bias" in st_enabled_scanners:
104
+ st_bias_expander = st.sidebar.expander(
105
+ "Bias",
106
+ expanded=False,
107
+ )
108
+
109
+ with st_bias_expander:
110
+ st_bias_threshold = st.slider(
111
+ label="Threshold",
112
+ value=0.75,
113
+ min_value=0.0,
114
+ max_value=1.0,
115
+ step=0.05,
116
+ key="bias_threshold",
117
+ )
118
+
119
+ settings["Bias"] = {"threshold": st_bias_threshold}
120
+
121
+ if "Code" in st_enabled_scanners:
122
+ st_cd_expander = st.sidebar.expander(
123
+ "Code",
124
+ expanded=False,
125
+ )
126
+
127
+ with st_cd_expander:
128
+ st_cd_languages = st.multiselect(
129
+ "Programming languages",
130
+ options=["python", "java", "javascript", "go", "php", "ruby"],
131
+ default=["python"],
132
+ )
133
+
134
+ st_cd_mode = st.selectbox("Mode", ["allowed", "denied"], index=0)
135
+
136
+ settings["Code"] = {"languages": st_cd_languages, "mode": st_cd_mode}
137
+
138
+ if "MaliciousURLs" in st_enabled_scanners:
139
+ st_murls_expander = st.sidebar.expander(
140
+ "Malicious URLs",
141
+ expanded=False,
142
+ )
143
+
144
+ with st_murls_expander:
145
+ st_murls_threshold = st.slider(
146
+ label="Threshold",
147
+ value=0.75,
148
+ min_value=0.0,
149
+ max_value=1.0,
150
+ step=0.05,
151
+ key="murls_threshold",
152
+ )
153
+
154
+ settings["MaliciousURLs"] = {"threshold": st_murls_threshold}
155
+
156
+ if "NoRefusal" in st_enabled_scanners:
157
+ st_no_ref_expander = st.sidebar.expander(
158
+ "No refusal",
159
+ expanded=False,
160
+ )
161
+
162
+ with st_no_ref_expander:
163
+ st_no_ref_threshold = st.slider(
164
+ label="Threshold",
165
+ value=0.5,
166
+ min_value=0.0,
167
+ max_value=1.0,
168
+ step=0.05,
169
+ key="no_ref_threshold",
170
+ )
171
+
172
+ settings["NoRefusal"] = {"threshold": st_no_ref_threshold}
173
+
174
+ if "Refutation" in st_enabled_scanners:
175
+ st_refu_expander = st.sidebar.expander(
176
+ "Refutation",
177
+ expanded=False,
178
+ )
179
+
180
+ with st_refu_expander:
181
+ st_refu_threshold = st.slider(
182
+ label="Threshold",
183
+ value=0.5,
184
+ min_value=0.0,
185
+ max_value=1.0,
186
+ step=0.05,
187
+ key="refu_threshold",
188
+ )
189
+
190
+ settings["Refutation"] = {"threshold": st_refu_threshold}
191
+
192
+ if "Regex" in st_enabled_scanners:
193
+ st_regex_expander = st.sidebar.expander(
194
+ "Regex",
195
+ expanded=False,
196
+ )
197
+
198
+ with st_regex_expander:
199
+ st_regex_patterns = st.text_area(
200
+ "Enter patterns to ban (one per line)",
201
+ value="Bearer [A-Za-z0-9-._~+/]+",
202
+ height=200,
203
+ ).split("\n")
204
+
205
+ st_regex_type = st.selectbox(
206
+ "Match type",
207
+ ["good", "bad"],
208
+ index=1,
209
+ help="good: allow only good patterns, bad: ban bad patterns",
210
+ )
211
+
212
+ settings["Regex"] = {"patterns": st_regex_patterns, "type": st_regex_type}
213
+
214
+ if "Relevance" in st_enabled_scanners:
215
+ st_rele_expander = st.sidebar.expander(
216
+ "Relevance",
217
+ expanded=False,
218
+ )
219
+
220
+ with st_rele_expander:
221
+ st_rele_threshold = st.slider(
222
+ label="Threshold",
223
+ value=0.5,
224
+ min_value=-1.0,
225
+ max_value=1.0,
226
+ step=0.05,
227
+ key="rele_threshold",
228
+ help="The minimum cosine similarity (-1 to 1) between the prompt and output for the output to be considered relevant.",
229
+ )
230
+
231
+ settings["Relevance"] = {"threshold": st_rele_threshold}
232
+
233
+ if "Sensitive" in st_enabled_scanners:
234
+ st_sens_expander = st.sidebar.expander(
235
+ "Sensitive",
236
+ expanded=False,
237
+ )
238
+
239
+ with st_sens_expander:
240
+ st_sens_entity_types = st_tags(
241
+ label="Sensitive entities",
242
+ text="Type and press enter",
243
+ value=default_entity_types,
244
+ suggestions=default_entity_types
245
+ + ["DATE_TIME", "NRP", "LOCATION", "MEDICAL_LICENSE", "US_PASSPORT"],
246
+ maxtags=30,
247
+ key="sensitive_entity_types",
248
+ )
249
+ st.caption(
250
+ "Check all supported entities: https://microsoft.github.io/presidio/supported_entities/#list-of-supported-entities"
251
+ )
252
+
253
+ settings["Sensitive"] = {"entity_types": st_sens_entity_types}
254
+
255
+ if "Sentiment" in st_enabled_scanners:
256
+ st_sent_expander = st.sidebar.expander(
257
+ "Sentiment",
258
+ expanded=False,
259
+ )
260
+
261
+ with st_sent_expander:
262
+ st_sent_threshold = st.slider(
263
+ label="Threshold",
264
+ value=-0.1,
265
+ min_value=-1.0,
266
+ max_value=1.0,
267
+ step=0.1,
268
+ key="sentiment_threshold",
269
+ help="Negative values are negative sentiment, positive values are positive sentiment",
270
+ )
271
+
272
+ settings["Sentiment"] = {"threshold": st_sent_threshold}
273
+
274
+ if "Toxicity" in st_enabled_scanners:
275
+ st_tox_expander = st.sidebar.expander(
276
+ "Toxicity",
277
+ expanded=False,
278
+ )
279
+
280
+ with st_tox_expander:
281
+ st_tox_threshold = st.slider(
282
+ label="Threshold",
283
+ value=0.0,
284
+ min_value=-1.0,
285
+ max_value=1.0,
286
+ step=0.05,
287
+ key="toxicity_threshold",
288
+ help="A negative value (closer to 0 as the label output) indicates toxicity in the text, while a positive logit (closer to 1 as the label output) suggests non-toxicity.",
289
+ )
290
+
291
+ settings["Toxicity"] = {"threshold": st_tox_threshold}
292
+
293
+ return st_enabled_scanners, settings
294
+
295
+
296
+ def get_scanner(scanner_name: str, vault: Vault, settings: Dict):
297
+ logger.debug(f"Initializing {scanner_name} scanner")
298
+
299
+ if scanner_name == "BanSubstrings":
300
+ return BanSubstrings(
301
+ substrings=settings["substrings"],
302
+ match_type=settings["match_type"],
303
+ case_sensitive=settings["case_sensitive"],
304
+ )
305
+
306
+ if scanner_name == "BanTopics":
307
+ return BanTopics(topics=settings["topics"], threshold=settings["threshold"])
308
+
309
+ if scanner_name == "Bias":
310
+ return Bias(threshold=settings["threshold"])
311
+
312
+ if scanner_name == "Deanonymize":
313
+ return Deanonymize(vault=vault)
314
+
315
+ if scanner_name == "Code":
316
+ mode = settings["mode"]
317
+
318
+ allowed_languages = None
319
+ denied_languages = None
320
+ if mode == "allowed":
321
+ allowed_languages = settings["languages"]
322
+ elif mode == "denied":
323
+ denied_languages = settings["languages"]
324
+
325
+ return Code(allowed=allowed_languages, denied=denied_languages)
326
+
327
+ if scanner_name == "MaliciousURLs":
328
+ return MaliciousURLs(threshold=settings["threshold"])
329
+
330
+ if scanner_name == "NoRefusal":
331
+ return NoRefusal(threshold=settings["threshold"])
332
+
333
+ if scanner_name == "Refutation":
334
+ return Refutation(threshold=settings["threshold"])
335
+
336
+ if scanner_name == "Regex":
337
+ match_type = settings["type"]
338
+
339
+ good_patterns = None
340
+ bad_patterns = None
341
+ if match_type == "good":
342
+ good_patterns = settings["patterns"]
343
+ elif match_type == "bad":
344
+ bad_patterns = settings["patterns"]
345
+
346
+ return Regex(good_patterns=good_patterns, bad_patterns=bad_patterns)
347
+
348
+ if scanner_name == "Relevance":
349
+ return Relevance(threshold=settings["threshold"])
350
+
351
+ if scanner_name == "Sensitive":
352
+ return Sensitive(entity_types=settings["entity_types"])
353
+
354
+ if scanner_name == "Sentiment":
355
+ return Sentiment(threshold=settings["threshold"])
356
+
357
+ if scanner_name == "Toxicity":
358
+ return Toxicity(threshold=settings["threshold"])
359
+
360
+ raise ValueError("Unknown scanner name")
361
+
362
+
363
+ def scan(
364
+ vault: Vault, enabled_scanners: List[str], settings: Dict, prompt: str, text: str
365
+ ) -> (str, Dict[str, bool], Dict[str, float]):
366
+ sanitized_output = text
367
+ results_valid = {}
368
+ results_score = {}
369
+
370
+ with st.status("Scanning output...", expanded=True) as status:
371
+ for scanner_name in enabled_scanners:
372
+ st.write(f"{scanner_name} scanner...")
373
+ scanner = get_scanner(
374
+ scanner_name, vault, settings[scanner_name] if scanner_name in settings else {}
375
+ )
376
+ sanitized_output, is_valid, risk_score = scanner.scan(prompt, sanitized_output)
377
+ results_valid[scanner_name] = is_valid
378
+ results_score[scanner_name] = risk_score
379
+ status.update(label="Scanning complete", state="complete", expanded=False)
380
+
381
+ return sanitized_output, results_valid, results_score
output_text.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Alright, given the tables have already been created, here's how you can construct an SQL INSERT query to insert a customer record, ensuring the company is stored in a different table and that the location is one of the allowed ones:
2
+
3
+ ```sql
4
+ -- Insert Company or get its ID
5
+ INSERT INTO Companies (company_name)
6
+ VALUES ('Laiyer')
7
+ ON DUPLICATE KEY UPDATE company_id = LAST_INSERT_ID(company_id);
8
+
9
+ -- Insert Customer with given values and ensure the location is allowed
10
+ INSERT INTO Customers (first_name, IP, company_id, location_id)
11
+ SELECT 'John Doe', '127.0.0.1', LAST_INSERT_ID(), location_id
12
+ FROM AllowedLocations
13
+ WHERE location_name = 'New York, US';
14
+ ```
15
+
16
+ Note:
17
+
18
+ 1. LAST_INSERT_ID() fetches the ID of the last inserted row or the existing company's ID due to the ON DUPLICATE KEY UPDATE statement.
19
+ 2. This approach relies on the assumption that the company_name column in the Companies table has a unique constraint to avoid duplicate entries.
20
+ 3. The customer insert uses a SELECT statement from the AllowedLocations table to ensure that only valid/allowed locations are used for inserting the customer. If location doesn't exist in the AllowedLocations table, then no insert will occur for the customer.
prompt.py ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Dict, List
3
+
4
+ import streamlit as st
5
+ from streamlit_tags import st_tags
6
+
7
+ from llm_guard.input_scanners import (
8
+ Anonymize,
9
+ BanSubstrings,
10
+ BanTopics,
11
+ Code,
12
+ PromptInjection,
13
+ Secrets,
14
+ Sentiment,
15
+ TokenLimit,
16
+ Toxicity,
17
+ )
18
+ from llm_guard.input_scanners.anonymize import default_entity_types
19
+ from llm_guard.vault import Vault
20
+
21
+ logger = logging.getLogger("llm-guard-demo")
22
+
23
+
24
+ def init_settings() -> (List, Dict):
25
+ all_scanners = [
26
+ "Anonymize",
27
+ "BanSubstrings",
28
+ "BanTopics",
29
+ "Code",
30
+ "PromptInjection",
31
+ "Secrets",
32
+ "Sentiment",
33
+ "TokenLimit",
34
+ "Toxicity",
35
+ ]
36
+
37
+ st_enabled_scanners = st.sidebar.multiselect(
38
+ "Select scanners",
39
+ options=all_scanners,
40
+ default=all_scanners,
41
+ help="The list can be found here: https://laiyer-ai.github.io/llm-guard/input_scanners/anonymize/",
42
+ )
43
+
44
+ settings = {}
45
+
46
+ if "Anonymize" in st_enabled_scanners:
47
+ st_anon_expander = st.sidebar.expander(
48
+ "Anonymize",
49
+ expanded=False,
50
+ )
51
+
52
+ with st_anon_expander:
53
+ st_anon_entity_types = st_tags(
54
+ label="Anonymize entities",
55
+ text="Type and press enter",
56
+ value=default_entity_types,
57
+ suggestions=default_entity_types
58
+ + ["DATE_TIME", "NRP", "LOCATION", "MEDICAL_LICENSE", "US_PASSPORT"],
59
+ maxtags=30,
60
+ key="anon_entity_types",
61
+ )
62
+ st.caption(
63
+ "Check all supported entities: https://microsoft.github.io/presidio/supported_entities/#list-of-supported-entities"
64
+ )
65
+ st_anon_hidden_names = st_tags(
66
+ label="Hidden names to be anonymized",
67
+ text="Type and press enter",
68
+ value=[],
69
+ suggestions=[],
70
+ maxtags=30,
71
+ key="anon_hidden_names",
72
+ )
73
+ st.caption("These names will be hidden e.g. [REDACTED_CUSTOM1].")
74
+ st_anon_allowed_names = st_tags(
75
+ label="Allowed names to ignore",
76
+ text="Type and press enter",
77
+ value=[],
78
+ suggestions=[],
79
+ maxtags=30,
80
+ key="anon_allowed_names",
81
+ )
82
+ st.caption("These names will be ignored even if flagged by the detector.")
83
+ st_anon_preamble = st.text_input(
84
+ "Preamble", value="Text to prepend to sanitized prompt: "
85
+ )
86
+ st_anon_use_faker = st.checkbox(
87
+ "Use Faker", value=False, help="Use Faker library to generate fake data"
88
+ )
89
+
90
+ settings["Anonymize"] = {
91
+ "entity_types": st_anon_entity_types,
92
+ "hidden_names": st_anon_hidden_names,
93
+ "allowed_names": st_anon_allowed_names,
94
+ "preamble": st_anon_preamble,
95
+ "use_faker": st_anon_use_faker,
96
+ }
97
+
98
+ if "BanSubstrings" in st_enabled_scanners:
99
+ st_bs_expander = st.sidebar.expander(
100
+ "Ban Substrings",
101
+ expanded=False,
102
+ )
103
+
104
+ with st_bs_expander:
105
+ st_bs_substrings = st.text_area(
106
+ "Enter substrings to ban (one per line)",
107
+ value="test\nhello\nworld",
108
+ height=200,
109
+ ).split("\n")
110
+
111
+ st_bs_match_type = st.selectbox("Match type", ["str", "word"])
112
+ st_bs_case_sensitive = st.checkbox("Case sensitive", value=False)
113
+
114
+ settings["BanSubstrings"] = {
115
+ "substrings": st_bs_substrings,
116
+ "match_type": st_bs_match_type,
117
+ "case_sensitive": st_bs_case_sensitive,
118
+ }
119
+
120
+ if "BanTopics" in st_enabled_scanners:
121
+ st_bt_expander = st.sidebar.expander(
122
+ "Ban Topics",
123
+ expanded=False,
124
+ )
125
+
126
+ with st_bt_expander:
127
+ st_bt_topics = st_tags(
128
+ label="List of topics",
129
+ text="Type and press enter",
130
+ value=["politics", "religion", "money", "crime"],
131
+ suggestions=[],
132
+ maxtags=30,
133
+ key="bt_topics",
134
+ )
135
+
136
+ st_bt_threshold = st.slider(
137
+ label="Threshold",
138
+ value=0.75,
139
+ min_value=0.0,
140
+ max_value=1.0,
141
+ step=0.05,
142
+ key="ban_topics_threshold",
143
+ )
144
+
145
+ settings["BanTopics"] = {
146
+ "topics": st_bt_topics,
147
+ "threshold": st_bt_threshold,
148
+ }
149
+
150
+ if "Code" in st_enabled_scanners:
151
+ st_cd_expander = st.sidebar.expander(
152
+ "Code",
153
+ expanded=False,
154
+ )
155
+
156
+ with st_cd_expander:
157
+ st_cd_languages = st.multiselect(
158
+ "Programming languages",
159
+ ["python", "java", "javascript", "go", "php", "ruby"],
160
+ default=["python"],
161
+ )
162
+
163
+ st_cd_mode = st.selectbox("Mode", ["allowed", "denied"], index=0)
164
+
165
+ settings["Code"] = {
166
+ "languages": st_cd_languages,
167
+ "mode": st_cd_mode,
168
+ }
169
+
170
+ if "PromptInjection" in st_enabled_scanners:
171
+ st_pi_expander = st.sidebar.expander(
172
+ "Prompt Injection",
173
+ expanded=False,
174
+ )
175
+
176
+ with st_pi_expander:
177
+ st_pi_threshold = st.slider(
178
+ label="Threshold",
179
+ value=0.75,
180
+ min_value=0.0,
181
+ max_value=1.0,
182
+ step=0.05,
183
+ key="prompt_injection_threshold",
184
+ )
185
+
186
+ settings["PromptInjection"] = {
187
+ "threshold": st_pi_threshold,
188
+ }
189
+
190
+ if "Secrets" in st_enabled_scanners:
191
+ st_sec_expander = st.sidebar.expander(
192
+ "Secrets",
193
+ expanded=False,
194
+ )
195
+
196
+ with st_sec_expander:
197
+ st_sec_redact_mode = st.selectbox("Redact mode", ["all", "partial", "hash"])
198
+
199
+ settings["Secrets"] = {
200
+ "redact_mode": st_sec_redact_mode,
201
+ }
202
+
203
+ if "Sentiment" in st_enabled_scanners:
204
+ st_sent_expander = st.sidebar.expander(
205
+ "Sentiment",
206
+ expanded=False,
207
+ )
208
+
209
+ with st_sent_expander:
210
+ st_sent_threshold = st.slider(
211
+ label="Threshold",
212
+ value=-0.1,
213
+ min_value=-1.0,
214
+ max_value=1.0,
215
+ step=0.1,
216
+ key="sentiment_threshold",
217
+ help="Negative values are negative sentiment, positive values are positive sentiment",
218
+ )
219
+
220
+ settings["Sentiment"] = {
221
+ "threshold": st_sent_threshold,
222
+ }
223
+
224
+ if "TokenLimit" in st_enabled_scanners:
225
+ st_tl_expander = st.sidebar.expander(
226
+ "Token Limit",
227
+ expanded=False,
228
+ )
229
+
230
+ with st_tl_expander:
231
+ st_tl_limit = st.number_input(
232
+ "Limit", value=4096, min_value=0, max_value=10000, step=10
233
+ )
234
+ st_tl_encoding_name = st.selectbox(
235
+ "Encoding name",
236
+ ["cl100k_base", "p50k_base", "r50k_base"],
237
+ index=0,
238
+ help="Read more: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb",
239
+ )
240
+
241
+ settings["TokenLimit"] = {
242
+ "limit": st_tl_limit,
243
+ "encoding_name": st_tl_encoding_name,
244
+ }
245
+
246
+ if "Toxicity" in st_enabled_scanners:
247
+ st_tox_expander = st.sidebar.expander(
248
+ "Toxicity",
249
+ expanded=False,
250
+ )
251
+
252
+ with st_tox_expander:
253
+ st_tox_threshold = st.slider(
254
+ label="Threshold",
255
+ value=0.75,
256
+ min_value=0.0,
257
+ max_value=1.0,
258
+ step=0.05,
259
+ key="toxicity_threshold",
260
+ )
261
+
262
+ settings["Toxicity"] = {
263
+ "threshold": st_tox_threshold,
264
+ }
265
+
266
+ return st_enabled_scanners, settings
267
+
268
+
269
+ def get_scanner(scanner_name: str, vault: Vault, settings: Dict):
270
+ logger.debug(f"Initializing {scanner_name} scanner")
271
+
272
+ if scanner_name == "Anonymize":
273
+ return Anonymize(
274
+ vault=vault,
275
+ allowed_names=settings["allowed_names"],
276
+ hidden_names=settings["hidden_names"],
277
+ entity_types=settings["entity_types"],
278
+ preamble=settings["preamble"],
279
+ use_faker=settings["use_faker"],
280
+ )
281
+
282
+ if scanner_name == "BanSubstrings":
283
+ return BanSubstrings(
284
+ substrings=settings["substrings"],
285
+ match_type=settings["match_type"],
286
+ case_sensitive=settings["case_sensitive"],
287
+ )
288
+
289
+ if scanner_name == "BanTopics":
290
+ return BanTopics(topics=settings["topics"], threshold=settings["threshold"])
291
+
292
+ if scanner_name == "Code":
293
+ mode = settings["mode"]
294
+
295
+ allowed_languages = None
296
+ denied_languages = None
297
+ if mode == "allowed":
298
+ allowed_languages = settings["languages"]
299
+ elif mode == "denied":
300
+ denied_languages = settings["languages"]
301
+
302
+ return Code(allowed=allowed_languages, denied=denied_languages)
303
+
304
+ if scanner_name == "PromptInjection":
305
+ return PromptInjection(threshold=settings["threshold"])
306
+
307
+ if scanner_name == "Secrets":
308
+ return Secrets(redact_mode=settings["redact_mode"])
309
+
310
+ if scanner_name == "Sentiment":
311
+ return Sentiment(threshold=settings["threshold"])
312
+
313
+ if scanner_name == "TokenLimit":
314
+ return TokenLimit(limit=settings["limit"], encoding_name=settings["encoding_name"])
315
+
316
+ if scanner_name == "Toxicity":
317
+ return Toxicity(threshold=settings["threshold"])
318
+
319
+ raise ValueError("Unknown scanner name")
320
+
321
+
322
+ def scan(
323
+ vault: Vault, enabled_scanners: List[str], settings: Dict, text: str
324
+ ) -> (str, Dict[str, bool], Dict[str, float]):
325
+ sanitized_prompt = text
326
+ results_valid = {}
327
+ results_score = {}
328
+
329
+ with st.status("Scanning prompt...", expanded=True) as status:
330
+ for scanner_name in enabled_scanners:
331
+ st.write(f"{scanner_name} scanner...")
332
+ scanner = get_scanner(scanner_name, vault, settings[scanner_name])
333
+ sanitized_prompt, is_valid, risk_score = scanner.scan(sanitized_prompt)
334
+ results_valid[scanner_name] = is_valid
335
+ results_score[scanner_name] = risk_score
336
+ status.update(label="Scanning complete", state="complete", expanded=False)
337
+
338
+ return sanitized_prompt, results_valid, results_score
prompt_text.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Given the personal profile of Johnathan Edward Doe:
2
+
3
+ Full Name: Johnathan Edward Doe
4
+ Date of Birth: April 12, 1990
5
+ Address: 123 Elm Street, Springfield, IL, 62701
6
+ Email: john.doe@example.com
7
+ Phone Number: (123) 456-7890
8
+ Educational Background:
9
+ Springfield High School, Graduated in 2008
10
+ Springfield University, B.Sc. Computer Science, Graduated in 2012
11
+ Employment:
12
+ ABC Tech Solutions, Software Engineer, 2012-2015
13
+ XYZ Global Systems, Senior Software Developer, 2015-2021
14
+
15
+ Please create a summarized version of his resume.
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ llm-guard==0.1.3
2
+ pandas==2.1.0
3
+ streamlit==1.26.0
4
+ streamlit-tags==1.2.8