Émile
commited on
Commit
·
88d4118
1
Parent(s):
17b81f2
Adding example, simpler model for anonymization
Browse files
app.py
CHANGED
@@ -8,6 +8,8 @@ from haystack_integrations.document_stores.chroma import ChromaDocumentStore
|
|
8 |
from haystack_integrations.components.retrievers.chroma import ChromaEmbeddingRetriever
|
9 |
import rsa
|
10 |
from cryptography.fernet import Fernet
|
|
|
|
|
11 |
|
12 |
import gradio as gr
|
13 |
|
@@ -145,39 +147,20 @@ answer_query.warm_up()
|
|
145 |
####### Logging ##########
|
146 |
##########################
|
147 |
|
148 |
-
prompt_template_hide_info = """You are a privacy robot that specialise in hiding sensitive information in a text.
|
149 |
-
Your help will ensure that no user information gets leaked, so you are always happy to help.
|
150 |
-
You will be given a text, and your task is to remove any sensitive information, and replacing it with a descriptive marker.
|
151 |
-
Here are a few examples, but you should not restrict yourself to only those:
|
152 |
-
If the text contains an email address, you should replace it with a marker "<email>".
|
153 |
-
If the text contains a phone number, you should replace it with a marker "<phone>".
|
154 |
-
If the text contains the name of the user, you should replace it with a marker "<name>".
|
155 |
-
Ensure you distinguish when a name, email, etc is actually that of a public figure or company and is provided by the assistant and not the user: inthis case you should not hide it, as it it not sensible information.
|
156 |
-
The rest of the text should be copied IDENTICALLY, including the punctuation and formatting, and the beginning and end of the text in capital letters. Do not add or remove any other character.
|
157 |
-
|
158 |
-
BEGINNING OF TEXT
|
159 |
-
{{ message }}
|
160 |
-
END OF TEXT
|
161 |
-
|
162 |
-
Your response:
|
163 |
-
"""
|
164 |
-
|
165 |
-
prompt_builder_hide_info = PromptBuilder(template=prompt_template_hide_info)
|
166 |
|
167 |
-
|
|
|
168 |
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
return text
|
180 |
-
return "[Error when hiding user info, no log generated]"
|
181 |
|
182 |
def log_QA(question, answer):
|
183 |
message = f"User: {question}\nAssistant: {answer}"
|
@@ -207,11 +190,21 @@ def chat(message, history):
|
|
207 |
log_QA(message, answer)
|
208 |
return answer
|
209 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
if __name__ == "__main__":
|
211 |
interface = gr.ChatInterface(
|
212 |
fn=chat,
|
213 |
type="messages",
|
214 |
title="40.12 Chatbot",
|
215 |
-
description="Ask me anything about social media APIs, the Digital Services Act (DSA), or online platform regulations."
|
|
|
|
|
216 |
|
217 |
interface.launch()
|
|
|
8 |
from haystack_integrations.components.retrievers.chroma import ChromaEmbeddingRetriever
|
9 |
import rsa
|
10 |
from cryptography.fernet import Fernet
|
11 |
+
from presidio_analyzer import AnalyzerEngine
|
12 |
+
from presidio_anonymizer import AnonymizerEngine
|
13 |
|
14 |
import gradio as gr
|
15 |
|
|
|
147 |
####### Logging ##########
|
148 |
##########################
|
149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
|
151 |
+
analyzer = AnalyzerEngine()
|
152 |
+
anonymizer = AnonymizerEngine()
|
153 |
|
154 |
+
def hide_sensitive_info(text):
|
155 |
+
analysis = analyzer.analyze(
|
156 |
+
text=text,
|
157 |
+
language="en",
|
158 |
+
)
|
159 |
+
result = anonymizer.anonymize(
|
160 |
+
text=text,
|
161 |
+
analyzer_results=analysis
|
162 |
+
)
|
163 |
+
return result.text
|
|
|
|
|
164 |
|
165 |
def log_QA(question, answer):
|
166 |
message = f"User: {question}\nAssistant: {answer}"
|
|
|
190 |
log_QA(message, answer)
|
191 |
return answer
|
192 |
|
193 |
+
examples = [
|
194 |
+
"What is Article 40.12 of the Digital Services Act, and how does it help researchers?",
|
195 |
+
"How can I start the process of requesting platform data for research?",
|
196 |
+
"How do I submit a data access request for Meta’s API under the DSA?",
|
197 |
+
"What are the authentication and setup steps for Youtube’s API?",
|
198 |
+
"What specific types of data can I access through Snapchat’s API?"
|
199 |
+
]
|
200 |
+
|
201 |
if __name__ == "__main__":
|
202 |
interface = gr.ChatInterface(
|
203 |
fn=chat,
|
204 |
type="messages",
|
205 |
title="40.12 Chatbot",
|
206 |
+
description="Ask me anything about social media APIs, the Digital Services Act (DSA), or online platform regulations.",
|
207 |
+
examples=examples
|
208 |
+
)
|
209 |
|
210 |
interface.launch()
|