Émile commited on
Commit
88d4118
·
1 Parent(s): 17b81f2

Adding example, simpler model for anonymization

Browse files
Files changed (1) hide show
  1. app.py +25 -32
app.py CHANGED
@@ -8,6 +8,8 @@ from haystack_integrations.document_stores.chroma import ChromaDocumentStore
8
  from haystack_integrations.components.retrievers.chroma import ChromaEmbeddingRetriever
9
  import rsa
10
  from cryptography.fernet import Fernet
 
 
11
 
12
  import gradio as gr
13
 
@@ -145,39 +147,20 @@ answer_query.warm_up()
145
  ####### Logging ##########
146
  ##########################
147
 
148
- prompt_template_hide_info = """You are a privacy robot that specialise in hiding sensitive information in a text.
149
- Your help will ensure that no user information gets leaked, so you are always happy to help.
150
- You will be given a text, and your task is to remove any sensitive information, and replacing it with a descriptive marker.
151
- Here are a few examples, but you should not restrict yourself to only those:
152
- If the text contains an email address, you should replace it with a marker "<email>".
153
- If the text contains a phone number, you should replace it with a marker "<phone>".
154
- If the text contains the name of the user, you should replace it with a marker "<name>".
155
- Ensure you distinguish when a name, email, etc is actually that of a public figure or company and is provided by the assistant and not the user: inthis case you should not hide it, as it it not sensible information.
156
- The rest of the text should be copied IDENTICALLY, including the punctuation and formatting, and the beginning and end of the text in capital letters. Do not add or remove any other character.
157
-
158
- BEGINNING OF TEXT
159
- {{ message }}
160
- END OF TEXT
161
-
162
- Your response:
163
- """
164
-
165
- prompt_builder_hide_info = PromptBuilder(template=prompt_template_hide_info)
166
 
167
- llm_hide_info = setup_generator("gpt-4o-mini")
 
168
 
169
- pipe_hide_sensitive_info = Pipeline()
170
- pipe_hide_sensitive_info.add_component("prompt_builder_hide_info", prompt_builder_hide_info)
171
- pipe_hide_sensitive_info.add_component("llm_hide_info", llm_hide_info)
172
- pipe_hide_sensitive_info.connect("prompt_builder_hide_info", "llm_hide_info")
173
-
174
- def hide_sensitive_info(message):
175
- for tries in range(3):
176
- answer = pipe_hide_sensitive_info.run({"message": message})["llm_hide_info"]["replies"][0]
177
- if "BEGINNING OF TEXT" in answer and "END OF TEXT" in answer:
178
- text = answer[answer.find("BEGINNING OF TEXT") + len("BEGINNING OF TEXT"):answer.find("END OF TEXT")].strip()
179
- return text
180
- return "[Error when hiding user info, no log generated]"
181
 
182
  def log_QA(question, answer):
183
  message = f"User: {question}\nAssistant: {answer}"
@@ -207,11 +190,21 @@ def chat(message, history):
207
  log_QA(message, answer)
208
  return answer
209
 
 
 
 
 
 
 
 
 
210
  if __name__ == "__main__":
211
  interface = gr.ChatInterface(
212
  fn=chat,
213
  type="messages",
214
  title="40.12 Chatbot",
215
- description="Ask me anything about social media APIs, the Digital Services Act (DSA), or online platform regulations.")
 
 
216
 
217
  interface.launch()
 
8
  from haystack_integrations.components.retrievers.chroma import ChromaEmbeddingRetriever
9
  import rsa
10
  from cryptography.fernet import Fernet
11
+ from presidio_analyzer import AnalyzerEngine
12
+ from presidio_anonymizer import AnonymizerEngine
13
 
14
  import gradio as gr
15
 
 
147
  ####### Logging ##########
148
  ##########################
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
+ analyzer = AnalyzerEngine()
152
+ anonymizer = AnonymizerEngine()
153
 
154
+ def hide_sensitive_info(text):
155
+ analysis = analyzer.analyze(
156
+ text=text,
157
+ language="en",
158
+ )
159
+ result = anonymizer.anonymize(
160
+ text=text,
161
+ analyzer_results=analysis
162
+ )
163
+ return result.text
 
 
164
 
165
  def log_QA(question, answer):
166
  message = f"User: {question}\nAssistant: {answer}"
 
190
  log_QA(message, answer)
191
  return answer
192
 
193
+ examples = [
194
+ "What is Article 40.12 of the Digital Services Act, and how does it help researchers?",
195
+ "How can I start the process of requesting platform data for research?",
196
+ "How do I submit a data access request for Meta’s API under the DSA?",
197
+ "What are the authentication and setup steps for Youtube’s API?",
198
+ "What specific types of data can I access through Snapchat’s API?"
199
+ ]
200
+
201
  if __name__ == "__main__":
202
  interface = gr.ChatInterface(
203
  fn=chat,
204
  type="messages",
205
  title="40.12 Chatbot",
206
+ description="Ask me anything about social media APIs, the Digital Services Act (DSA), or online platform regulations.",
207
+ examples=examples
208
+ )
209
 
210
  interface.launch()