Spaces:
Sleeping
Sleeping
chore: update Marketing v2
Browse files- app.py +48 -31
- files/anonymized_document.txt +7 -6
- files/encrypted_document.txt +6 -0
- files/mapping_clear_to_anonymized.pkl +2 -2
- files/original_document.txt +7 -6
- files/original_document_uuid_mapping.json +8 -17
- utils_demo.py +6 -5
app.py
CHANGED
@@ -32,8 +32,10 @@ time.sleep(3)
|
|
32 |
# Load data from files required for the application
|
33 |
UUID_MAP = read_json(MAPPING_UUID_PATH)
|
34 |
ANONYMIZED_DOCUMENT = read_txt(ANONYMIZED_FILE_PATH)
|
35 |
-
|
|
|
36 |
ORIGINAL_DOCUMENT = read_txt(ORIGINAL_FILE_PATH).split("\n\n")
|
|
|
37 |
|
38 |
# 4. Data Processing and Operations (No specific operations shown here, assuming it's part of anonymizer or client usage)
|
39 |
|
@@ -44,9 +46,9 @@ ORIGINAL_DOCUMENT = read_txt(ORIGINAL_FILE_PATH).split("\n\n")
|
|
44 |
USER_ID = numpy.random.randint(0, 2**32)
|
45 |
|
46 |
|
47 |
-
def
|
48 |
|
49 |
-
selected_sentences = [
|
50 |
|
51 |
anonymized_selected_sentence = sorted(selected_sentences, key=lambda x: x[0])
|
52 |
|
@@ -90,6 +92,16 @@ def key_gen_fn() -> Dict:
|
|
90 |
print("Keys have been generated ✅")
|
91 |
return {gen_key_btn: gr.update(value="Keys have been generated ✅")}
|
92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
def encrypt_query_fn(query):
|
95 |
|
@@ -141,10 +153,10 @@ def encrypt_query_fn(query):
|
|
141 |
KEYS_DIR / f"{USER_ID}/encrypted_input_len", len(encrypted_tokens[0]).to_bytes(10, "big")
|
142 |
)
|
143 |
|
144 |
-
encrypted_quant_tokens_hex = [token.hex()[500:
|
145 |
|
146 |
return {
|
147 |
-
output_encrypted_box: gr.update(value=" ".join(encrypted_quant_tokens_hex)),
|
148 |
anonymized_text_output: gr.update(visible=True, value=None),
|
149 |
identified_words_output_df: gr.update(visible=False, value=None),
|
150 |
}
|
@@ -510,28 +522,26 @@ with demo:
|
|
510 |
)
|
511 |
|
512 |
with gr.Row():
|
513 |
-
with gr.Column():
|
514 |
-
gr.Markdown("**Original document:**")
|
515 |
-
|
516 |
-
with gr.Column():
|
517 |
-
gr.Markdown("**Encrypted document:**")
|
518 |
-
|
519 |
-
|
520 |
-
with gr.Row():
|
521 |
-
with gr.Column():
|
522 |
original_sentences_box = gr.CheckboxGroup(
|
523 |
ORIGINAL_DOCUMENT,
|
524 |
value=ORIGINAL_DOCUMENT,
|
525 |
-
|
|
|
526 |
)
|
527 |
|
528 |
-
with gr.Column():
|
|
|
|
|
|
|
|
|
529 |
anonymized_doc_box = gr.Textbox(
|
530 |
-
|
|
|
531 |
)
|
532 |
|
533 |
original_sentences_box.change(
|
534 |
-
fn=
|
535 |
inputs=[original_sentences_box],
|
536 |
outputs=[anonymized_doc_box],
|
537 |
)
|
@@ -541,12 +551,9 @@ with demo:
|
|
541 |
gr.Markdown("<hr />")
|
542 |
gr.Markdown("## Step 2.2: Select the prompt you want to encrypt\n\n"
|
543 |
"""Please choose from the predefined options in
|
544 |
-
<span style='color:grey'>“Prompt examples”</span>
|
545 |
-
the <span style='color:grey'>“Customized prompt”</span>
|
546 |
-
|
547 |
-
Remain concise and relevant to the context. Any off-topic query will not be processed.
|
548 |
-
"""
|
549 |
-
)
|
550 |
|
551 |
with gr.Row():
|
552 |
with gr.Column(scale=5):
|
@@ -559,7 +566,7 @@ with demo:
|
|
559 |
gr.Markdown("Or")
|
560 |
|
561 |
query_box = gr.Textbox(
|
562 |
-
value="What is
|
563 |
)
|
564 |
|
565 |
default_query_box.change(
|
@@ -592,9 +599,19 @@ with demo:
|
|
592 |
|
593 |
run_fhe_btn = gr.Button("Anonymize using FHE")
|
594 |
|
595 |
-
|
596 |
-
|
597 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
598 |
|
599 |
identified_words_output_df = gr.Dataframe(label="Identified words:", visible=False)
|
600 |
|
@@ -618,7 +635,7 @@ with demo:
|
|
618 |
########################## ChatGpt Part ##########################
|
619 |
|
620 |
gr.Markdown("<hr />")
|
621 |
-
gr.Markdown("##
|
622 |
gr.Markdown(
|
623 |
"""After securely anonymizing the query with FHE,
|
624 |
you can forward it to ChatGPT without having any concern about information leakage."""
|
@@ -627,9 +644,9 @@ with demo:
|
|
627 |
chatgpt_button = gr.Button("Query ChatGPT")
|
628 |
|
629 |
with gr.Row():
|
630 |
-
chatgpt_response_anonymized = gr.Textbox(label="ChatGPT's anonymized response:", lines=
|
631 |
chatgpt_response_deanonymized = gr.Textbox(
|
632 |
-
label="ChatGPT's non-anonymized response:", lines=
|
633 |
)
|
634 |
|
635 |
chatgpt_button.click(
|
|
|
32 |
# Load data from files required for the application
|
33 |
UUID_MAP = read_json(MAPPING_UUID_PATH)
|
34 |
ANONYMIZED_DOCUMENT = read_txt(ANONYMIZED_FILE_PATH)
|
35 |
+
MAPPING_ANONYMIZED_SENTENCES = read_pickle(MAPPING_ANONYMIZED_SENTENCES_PATH)
|
36 |
+
MAPPING_ENCRYPTED_SENTENCES = read_pickle(MAPPING_ENCRYPTED_SENTENCES_PATH)
|
37 |
ORIGINAL_DOCUMENT = read_txt(ORIGINAL_FILE_PATH).split("\n\n")
|
38 |
+
print(ORIGINAL_DOCUMENT)
|
39 |
|
40 |
# 4. Data Processing and Operations (No specific operations shown here, assuming it's part of anonymizer or client usage)
|
41 |
|
|
|
46 |
USER_ID = numpy.random.randint(0, 2**32)
|
47 |
|
48 |
|
49 |
+
def select_static_anonymized_sentences_fn(selected_sentences: List):
|
50 |
|
51 |
+
selected_sentences = [MAPPING_ANONYMIZED_SENTENCES[sentence] for sentence in selected_sentences]
|
52 |
|
53 |
anonymized_selected_sentence = sorted(selected_sentences, key=lambda x: x[0])
|
54 |
|
|
|
92 |
print("Keys have been generated ✅")
|
93 |
return {gen_key_btn: gr.update(value="Keys have been generated ✅")}
|
94 |
|
95 |
+
def select_static_encrypted_sentences_fn(selected_sentences: List):
|
96 |
+
|
97 |
+
selected_sentences = [MAPPING_ENCRYPTED_SENTENCES[sentence] for sentence in selected_sentences]
|
98 |
+
|
99 |
+
anonymized_selected_sentence = sorted(selected_sentences, key=lambda x: x[0])
|
100 |
+
|
101 |
+
anonymized_selected_sentence = [sentence for _, sentence in anonymized_selected_sentence]
|
102 |
+
|
103 |
+
return {encrypted_doc_box: gr.update(value="\n\n".join(anonymized_selected_sentence))}
|
104 |
+
|
105 |
|
106 |
def encrypt_query_fn(query):
|
107 |
|
|
|
153 |
KEYS_DIR / f"{USER_ID}/encrypted_input_len", len(encrypted_tokens[0]).to_bytes(10, "big")
|
154 |
)
|
155 |
|
156 |
+
encrypted_quant_tokens_hex = [token.hex()[500:580] for token in encrypted_tokens]
|
157 |
|
158 |
return {
|
159 |
+
output_encrypted_box: gr.update(value=" ".join(encrypted_quant_tokens_hex), lines=4),
|
160 |
anonymized_text_output: gr.update(visible=True, value=None),
|
161 |
identified_words_output_df: gr.update(visible=False, value=None),
|
162 |
}
|
|
|
522 |
)
|
523 |
|
524 |
with gr.Row():
|
525 |
+
with gr.Column(scale=5):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
526 |
original_sentences_box = gr.CheckboxGroup(
|
527 |
ORIGINAL_DOCUMENT,
|
528 |
value=ORIGINAL_DOCUMENT,
|
529 |
+
label="Contract:",
|
530 |
+
show_label=True,
|
531 |
)
|
532 |
|
533 |
+
with gr.Column(scale=1, min_width=6):
|
534 |
+
gr.HTML("<div style='height: 77px;'></div>")
|
535 |
+
encrypt_doc_btn = gr.Button("Encrypt the document")
|
536 |
+
|
537 |
+
with gr.Column(scale=5):
|
538 |
anonymized_doc_box = gr.Textbox(
|
539 |
+
label="Encrypted document:",
|
540 |
+
show_label=True, value=ANONYMIZED_DOCUMENT, interactive=False, lines=11
|
541 |
)
|
542 |
|
543 |
original_sentences_box.change(
|
544 |
+
fn=select_static_anonymized_sentences_fn,
|
545 |
inputs=[original_sentences_box],
|
546 |
outputs=[anonymized_doc_box],
|
547 |
)
|
|
|
551 |
gr.Markdown("<hr />")
|
552 |
gr.Markdown("## Step 2.2: Select the prompt you want to encrypt\n\n"
|
553 |
"""Please choose from the predefined options in
|
554 |
+
<span style='color:grey'>“Prompt examples”</span> or craft a custom question in
|
555 |
+
the <span style='color:grey'>“Customized prompt”</span> text box.
|
556 |
+
Remain concise and relevant to the context. Any off-topic query will not be processed.""")
|
|
|
|
|
|
|
557 |
|
558 |
with gr.Row():
|
559 |
with gr.Column(scale=5):
|
|
|
566 |
gr.Markdown("Or")
|
567 |
|
568 |
query_box = gr.Textbox(
|
569 |
+
value="What is Kate international bank account number?", label="CUSTOMIZED PROMPT:", interactive=True
|
570 |
)
|
571 |
|
572 |
default_query_box.change(
|
|
|
599 |
|
600 |
run_fhe_btn = gr.Button("Anonymize using FHE")
|
601 |
|
602 |
+
with gr.Row():
|
603 |
+
with gr.Column(scale=5):
|
604 |
+
|
605 |
+
anonymized_text_output = gr.Textbox(
|
606 |
+
label="Decrypted and anonymized document", lines=5, interactive=True
|
607 |
+
)
|
608 |
+
|
609 |
+
with gr.Column(scale=5):
|
610 |
+
|
611 |
+
anonymized_query_output = gr.Textbox(
|
612 |
+
label="Decrypted and anonymized prompt", lines=5, interactive=True
|
613 |
+
)
|
614 |
+
|
615 |
|
616 |
identified_words_output_df = gr.Dataframe(label="Identified words:", visible=False)
|
617 |
|
|
|
635 |
########################## ChatGpt Part ##########################
|
636 |
|
637 |
gr.Markdown("<hr />")
|
638 |
+
gr.Markdown("## Step 4: Send anonymized prompt to ChatGPT")
|
639 |
gr.Markdown(
|
640 |
"""After securely anonymizing the query with FHE,
|
641 |
you can forward it to ChatGPT without having any concern about information leakage."""
|
|
|
644 |
chatgpt_button = gr.Button("Query ChatGPT")
|
645 |
|
646 |
with gr.Row():
|
647 |
+
chatgpt_response_anonymized = gr.Textbox(label="ChatGPT's anonymized response:", lines=5)
|
648 |
chatgpt_response_deanonymized = gr.Textbox(
|
649 |
+
label="ChatGPT's non-anonymized response:", lines=5
|
650 |
)
|
651 |
|
652 |
chatgpt_button.click(
|
files/anonymized_document.txt
CHANGED
@@ -1,10 +1,11 @@
|
|
1 |
-
|
2 |
-
My credit card number is e5b499b0 and my crypto wallet id is ac41d58b.
|
3 |
|
4 |
-
|
5 |
|
6 |
-
|
7 |
|
8 |
-
|
9 |
|
10 |
-
|
|
|
|
|
|
1 |
+
Members: e3383f5b 70fc6ec5 and 2708cb61 cda521d5
|
|
|
2 |
|
3 |
+
Date: e381418b 3534158a, 96c403e5
|
4 |
|
5 |
+
Scope: 2708cb61 agrees to provide graphic design services to e3383f5b for the creation of a company logo.
|
6 |
|
7 |
+
Amount: Bob agrees to pay 2708cb61 500 upon completion and delivery of the logo.
|
8 |
|
9 |
+
Deadline: The logo design must be completed and delivered to Bob within 14 days of the contract signing date.
|
10 |
+
|
11 |
+
Payment terms: 2708cb61s international bank account N: 61294a43
|
files/encrypted_document.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
827c6bc946254b0ffe4695b4f0fc001f2383a4cf066b44312f9fa5a5733e6b6e097e69d08441b32ad93a3e84f95932e3142ce91a7502f5594eafdd6ccc2ad779e2e166ff0ac66e5e015ac64e5aac0979
|
2 |
+
a2a7ebe458d32843738fe5e550642bfb2c7efeb0f3b65892c25c0ed4a329f90e62dd97a58f6c852d0a4f2e7bae863381a325b679761570df764a0f6aec3f01a23516f0cee88972e5ac324f6af45d8e7ee
|
3 |
+
2bb53d60fd24b7e24334179c30624dd49eea11c2210e2b63d2a5fc5ea20766be019c68e061cbbad75bdb3255b1428b1dd46d361ec0261dfcbc3081d1e43aaf1bfde51f81f9036e728c3931a9e3ff4b37
|
4 |
+
3b05e1c4a0adc1a82b3d471990b60990d7d4e66132ed15fc8a18a129432e4c73c2bb74ecc89be49571090b5de8934661928c1e153dd8746858aa8ece4d59452159147f8b54da923a9eb99d1c8006b389
|
5 |
+
788027d896e0977b3012f6d50718153aac3b6642ad7e72383ecd9def3b8f4cc6c7e6851f4491eed1c5693b56dd9ac79f03fde97bf9ad0d3c6bc1c8f94e95901ceeb4a38893f2a189e63562b43a453c3a
|
6 |
+
426b376b438d9755946251b5da13f7585ab9557bec48f1e300c43cfa9e6f3f2bc1eef1a13d801161c8a59384914ffbd4da96b25dad84c51b77df73060a1319d72a2e5d4d7eea734ba72a1a6657aa93ba
|
files/mapping_clear_to_anonymized.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce0f400a2f644ddf99bcbc76f856afc1ad79055b1f01133a69e7617d257de98c
|
3 |
+
size 943
|
files/original_document.txt
CHANGED
@@ -1,10 +1,11 @@
|
|
1 |
-
|
2 |
-
My credit card number is 4095-2609-9393-4932 and my crypto wallet id is 16Yeky6GMjeNkAiNcBY7ZhrLoMSgg1BoyZ.
|
3 |
|
4 |
-
|
5 |
|
6 |
-
|
7 |
|
8 |
-
|
9 |
|
10 |
-
|
|
|
|
|
|
1 |
+
Members: David Johnson and Kate Hemingway
|
|
|
2 |
|
3 |
+
Date: February 06, 2000
|
4 |
|
5 |
+
Scope: Kate agrees to provide graphic design services to David for the creation of a company logo.
|
6 |
|
7 |
+
Amount: Bob agrees to pay Kate $500 upon completion and delivery of the logo.
|
8 |
|
9 |
+
Deadline: The logo design must be completed and delivered to Bob within 14 days of the contract signing date.
|
10 |
+
|
11 |
+
Payment terms: Kate’s international bank account N°: IL150120690000003111111
|
files/original_document_uuid_mapping.json
CHANGED
@@ -1,19 +1,10 @@
|
|
1 |
{
|
2 |
-
"
|
3 |
-
"
|
4 |
-
"
|
5 |
-
"
|
6 |
-
"
|
7 |
-
"
|
8 |
-
"
|
9 |
-
"
|
10 |
-
"954567876544": "9eb07461",
|
11 |
-
"David": "ebe99761",
|
12 |
-
"IL150120690000003111111": "5ca977a4",
|
13 |
-
"International": "71d0f51c",
|
14 |
-
"Johnson": "53a9291d",
|
15 |
-
"Kate": "b474d794",
|
16 |
-
"Maine": "6337f12f",
|
17 |
-
"microsoft.com": "0d574451",
|
18 |
-
"test@presidio.site": "1f78e797"
|
19 |
}
|
|
|
1 |
{
|
2 |
+
"06": "3534158a",
|
3 |
+
"2000": "96c403e5",
|
4 |
+
"David": "e3383f5b",
|
5 |
+
"February": "e381418b",
|
6 |
+
"Hemingway": "cda521d5",
|
7 |
+
"IL150120690000003111111": "61294a43",
|
8 |
+
"Johnson": "70fc6ec5",
|
9 |
+
"Kate": "2708cb61"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
}
|
utils_demo.py
CHANGED
@@ -18,7 +18,7 @@ from pathlib import Path
|
|
18 |
SERVER_URL = "http://localhost:8000/"
|
19 |
|
20 |
# Maximum length for user queries
|
21 |
-
MAX_USER_QUERY_LEN =
|
22 |
|
23 |
# Base Directories
|
24 |
CURRENT_DIR = Path(__file__).parent
|
@@ -38,15 +38,16 @@ LOGREG_MODEL_PATH = CURRENT_DIR / "models" / "cml_logreg.model"
|
|
38 |
ORIGINAL_FILE_PATH = DATA_PATH / "original_document.txt"
|
39 |
ANONYMIZED_FILE_PATH = DATA_PATH / "anonymized_document.txt"
|
40 |
MAPPING_UUID_PATH = DATA_PATH / "original_document_uuid_mapping.json"
|
41 |
-
|
|
|
42 |
PROMPT_PATH = DATA_PATH / "chatgpt_prompt.txt"
|
43 |
|
44 |
|
45 |
# List of example queries for easy access
|
46 |
DEFAULT_QUERIES = {
|
47 |
-
"Example Query 1": "
|
48 |
-
"Example Query 2": "
|
49 |
-
"Example Query 3": "
|
50 |
}
|
51 |
|
52 |
# Load tokenizer and model
|
|
|
18 |
SERVER_URL = "http://localhost:8000/"
|
19 |
|
20 |
# Maximum length for user queries
|
21 |
+
MAX_USER_QUERY_LEN = 128
|
22 |
|
23 |
# Base Directories
|
24 |
CURRENT_DIR = Path(__file__).parent
|
|
|
38 |
ORIGINAL_FILE_PATH = DATA_PATH / "original_document.txt"
|
39 |
ANONYMIZED_FILE_PATH = DATA_PATH / "anonymized_document.txt"
|
40 |
MAPPING_UUID_PATH = DATA_PATH / "original_document_uuid_mapping.json"
|
41 |
+
MAPPING_ANONYMIZED_SENTENCES_PATH = DATA_PATH / "mapping_clear_to_anonymized.pkl"
|
42 |
+
MAPPING_ENCRYPTED_SENTENCES_PATH = DATA_PATH / "mapping_clear_to_encrypted.pkl"
|
43 |
PROMPT_PATH = DATA_PATH / "chatgpt_prompt.txt"
|
44 |
|
45 |
|
46 |
# List of example queries for easy access
|
47 |
DEFAULT_QUERIES = {
|
48 |
+
"Example Query 1": "What is the amount of the contract between David and Kate?",
|
49 |
+
"Example Query 2": "What's the duration of the contract?",
|
50 |
+
"Example Query 3": "Does Kate have an international bank account?",
|
51 |
}
|
52 |
|
53 |
# Load tokenizer and model
|