Spaces:
Sleeping
Sleeping
chore: fix chatgpt step
Browse files
app.py
CHANGED
@@ -132,7 +132,7 @@ def encrypt_query_fn(query):
|
|
132 |
|
133 |
encrypted_tokens.append(encrypted_x)
|
134 |
|
135 |
-
print(
|
136 |
|
137 |
assert len({len(token) for token in encrypted_tokens}) == 1
|
138 |
|
@@ -355,7 +355,7 @@ def decrypt_fn(text) -> Dict:
|
|
355 |
else:
|
356 |
identified_df = pd.DataFrame(columns=["Identified Words", "Probability"])
|
357 |
|
358 |
-
print(
|
359 |
|
360 |
return anonymized_text, identified_df
|
361 |
|
@@ -380,25 +380,20 @@ def anonymization_with_fn(query):
|
|
380 |
|
381 |
def query_chatgpt_fn(anonymized_query, anonymized_document):
|
382 |
|
383 |
-
|
384 |
-
|
|
|
385 |
error_message = "Error ❌: Please generate the key first!"
|
386 |
-
return {
|
387 |
|
388 |
-
|
389 |
-
if not encryted_query_path.is_file():
|
390 |
error_message = "Error ❌: Please encrypt your query first!"
|
391 |
-
return {
|
392 |
-
|
393 |
-
decrypted_query_path = KEYS_DIR / "reconstructed_sentence"
|
394 |
-
if not decrypted_query_path.is_file():
|
395 |
-
error_message = "Error ❌: Please run the FHE computation first!"
|
396 |
-
return {anonymized_text_output: gr.update(value=error_message)}
|
397 |
|
398 |
prompt = read_txt(PROMPT_PATH)
|
399 |
|
400 |
# Prepare prompt
|
401 |
-
|
402 |
query = (
|
403 |
"Document content:\n```\n"
|
404 |
+ anonymized_document
|
@@ -407,7 +402,7 @@ def query_chatgpt_fn(anonymized_query, anonymized_document):
|
|
407 |
+ anonymized_query
|
408 |
+ "\n```"
|
409 |
)
|
410 |
-
print(
|
411 |
|
412 |
completion = client.chat.completions.create(
|
413 |
model="gpt-4-1106-preview", # Replace with "gpt-4" if available
|
@@ -438,7 +433,9 @@ def query_chatgpt_fn(anonymized_query, anonymized_document):
|
|
438 |
else:
|
439 |
processed_tokens.append(token)
|
440 |
deanonymized_response = "".join(processed_tokens)
|
441 |
-
|
|
|
|
|
442 |
|
443 |
|
444 |
demo = gr.Blocks(css=".markdown-body { font-size: 18px; }")
|
@@ -473,8 +470,7 @@ with demo:
|
|
473 |
|
474 |
with gr.Accordion("What is encrypted anonymization?", open=False):
|
475 |
gr.Markdown(
|
476 |
-
|
477 |
-
Anonymization is the process of removing personally identifiable information (PII)
|
478 |
from data to protect individual privacy.
|
479 |
|
480 |
To resolve trust issues when deploying anonymization as a cloud service, Fully Homomorphic
|
@@ -507,13 +503,16 @@ with demo:
|
|
507 |
|
508 |
########################## Main document Part ##########################
|
509 |
|
|
|
510 |
gr.Markdown("## Step 2: Private document")
|
511 |
|
512 |
with gr.Row():
|
513 |
with gr.Column():
|
514 |
gr.Markdown("**Original document:**")
|
515 |
gr.Markdown(
|
516 |
-
"""This document was retrieved from the
|
|
|
|
|
517 |
You can select and deselect sentences to customize the document that will be used
|
518 |
as the initial prompt for ChatGPT in step 5.
|
519 |
"""
|
@@ -522,7 +521,7 @@ with demo:
|
|
522 |
gr.Markdown("**Anonymized document:**")
|
523 |
gr.Markdown(
|
524 |
"""You can see below the anonymized text, replaced with hexademical strings, that
|
525 |
-
will be sent to ChatGPT.
|
526 |
|
527 |
ChatGPT will then be able to answer any queries about the document.
|
528 |
"""
|
@@ -554,7 +553,8 @@ with demo:
|
|
554 |
|
555 |
gr.Markdown(
|
556 |
"""Now, you can formulate a query. Please choose from the predefined options in
|
557 |
-
|
|
|
558 |
|
559 |
Remain concise and relevant to the context. Any off-topic query will not be processed.
|
560 |
"""
|
|
|
132 |
|
133 |
encrypted_tokens.append(encrypted_x)
|
134 |
|
135 |
+
print("Data encrypted ✅ on Client Side")
|
136 |
|
137 |
assert len({len(token) for token in encrypted_tokens}) == 1
|
138 |
|
|
|
355 |
else:
|
356 |
identified_df = pd.DataFrame(columns=["Identified Words", "Probability"])
|
357 |
|
358 |
+
print("Decryption done ✅ on Client Side")
|
359 |
|
360 |
return anonymized_text, identified_df
|
361 |
|
|
|
380 |
|
381 |
def query_chatgpt_fn(anonymized_query, anonymized_document):
|
382 |
|
383 |
+
print("------------ Step 5: ChatGPT communication")
|
384 |
+
|
385 |
+
if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
|
386 |
error_message = "Error ❌: Please generate the key first!"
|
387 |
+
return {chatgpt_response_anonymized: gr.update(value=error_message)}
|
388 |
|
389 |
+
if not (CLIENT_DIR / f"{USER_ID}_encrypted_output").is_file():
|
|
|
390 |
error_message = "Error ❌: Please encrypt your query first!"
|
391 |
+
return {chatgpt_response_anonymized: gr.update(value=error_message)}
|
|
|
|
|
|
|
|
|
|
|
392 |
|
393 |
prompt = read_txt(PROMPT_PATH)
|
394 |
|
395 |
# Prepare prompt
|
396 |
+
initial_prompt = prompt + "\n"
|
397 |
query = (
|
398 |
"Document content:\n```\n"
|
399 |
+ anonymized_document
|
|
|
402 |
+ anonymized_query
|
403 |
+ "\n```"
|
404 |
)
|
405 |
+
print(f'initial_prompt:\n{initial_prompt}')
|
406 |
|
407 |
completion = client.chat.completions.create(
|
408 |
model="gpt-4-1106-preview", # Replace with "gpt-4" if available
|
|
|
433 |
else:
|
434 |
processed_tokens.append(token)
|
435 |
deanonymized_response = "".join(processed_tokens)
|
436 |
+
|
437 |
+
return {chatgpt_response_anonymized: gr.update(value=anonymized_response),
|
438 |
+
chatgpt_response_deanonymized: gr.update(value=deanonymized_response)}
|
439 |
|
440 |
|
441 |
demo = gr.Blocks(css=".markdown-body { font-size: 18px; }")
|
|
|
470 |
|
471 |
with gr.Accordion("What is encrypted anonymization?", open=False):
|
472 |
gr.Markdown(
|
473 |
+
"""Anonymization is the process of removing personally identifiable information (PII)
|
|
|
474 |
from data to protect individual privacy.
|
475 |
|
476 |
To resolve trust issues when deploying anonymization as a cloud service, Fully Homomorphic
|
|
|
503 |
|
504 |
########################## Main document Part ##########################
|
505 |
|
506 |
+
gr.Markdown("<hr />")
|
507 |
gr.Markdown("## Step 2: Private document")
|
508 |
|
509 |
with gr.Row():
|
510 |
with gr.Column():
|
511 |
gr.Markdown("**Original document:**")
|
512 |
gr.Markdown(
|
513 |
+
"""This document was retrieved from the
|
514 |
+
[Microsoft Presidio](https://huggingface.co/spaces/presidio/presidio_demo) demo.
|
515 |
+
|
516 |
You can select and deselect sentences to customize the document that will be used
|
517 |
as the initial prompt for ChatGPT in step 5.
|
518 |
"""
|
|
|
521 |
gr.Markdown("**Anonymized document:**")
|
522 |
gr.Markdown(
|
523 |
"""You can see below the anonymized text, replaced with hexademical strings, that
|
524 |
+
will be sent to ChatGPT.
|
525 |
|
526 |
ChatGPT will then be able to answer any queries about the document.
|
527 |
"""
|
|
|
553 |
|
554 |
gr.Markdown(
|
555 |
"""Now, you can formulate a query. Please choose from the predefined options in
|
556 |
+
<span style='color:grey'>“Queries examples”</span>" or craft a custom question in
|
557 |
+
the <span style='color:grey'>“Customized query”</span>" text box.
|
558 |
|
559 |
Remain concise and relevant to the context. Any off-topic query will not be processed.
|
560 |
"""
|