Zamanonymize3

Build error

App Files Files Community

kcelia commited on Mar 30, 2024

Commit

dc83cd7

unverified ·

1 Parent(s): 67fa189

chore: fix chatgpt step

Browse files

Files changed (1) hide show

app.py +21 -21

app.py CHANGED Viewed

@@ -132,7 +132,7 @@ def encrypt_query_fn(query):
         encrypted_tokens.append(encrypted_x)
-    print(f"Data encrypted ✅ on Client Side")
     assert len({len(token) for token in encrypted_tokens}) == 1
@@ -355,7 +355,7 @@ def decrypt_fn(text) -> Dict:
     else:
         identified_df = pd.DataFrame(columns=["Identified Words", "Probability"])
-    print(f"Decryption done ✅ on Client Side")
     return anonymized_text, identified_df
@@ -380,25 +380,20 @@ def anonymization_with_fn(query):
 def query_chatgpt_fn(anonymized_query, anonymized_document):
-    evaluation_key_path = KEYS_DIR / "evaluation_key"
-    if not evaluation_key_path.is_file():
         error_message = "Error ❌: Please generate the key first!"
-        return {anonymized_text_output: gr.update(value=error_message)}
-    encryted_query_path = KEYS_DIR / "encrypted_quantized_query"
-    if not encryted_query_path.is_file():
         error_message = "Error ❌: Please encrypt your query first!"
-        return {anonymized_text_output: gr.update(value=error_message)}
-    decrypted_query_path = KEYS_DIR / "reconstructed_sentence"
-    if not decrypted_query_path.is_file():
-        error_message = "Error ❌: Please run the FHE computation first!"
-        return {anonymized_text_output: gr.update(value=error_message)}
     prompt = read_txt(PROMPT_PATH)
     # Prepare prompt
-    full_prompt = prompt + "\n"
     query = (
         "Document content:\n```\n"
         + anonymized_document
@@ -407,7 +402,7 @@ def query_chatgpt_fn(anonymized_query, anonymized_document):
         + anonymized_query
         + "\n```"
     )
-    print(full_prompt)
     completion = client.chat.completions.create(
         model="gpt-4-1106-preview",  # Replace with "gpt-4" if available
@@ -438,7 +433,9 @@ def query_chatgpt_fn(anonymized_query, anonymized_document):
         else:
             processed_tokens.append(token)
     deanonymized_response = "".join(processed_tokens)
-    return anonymized_response, deanonymized_response
 demo = gr.Blocks(css=".markdown-body { font-size: 18px; }")
@@ -473,8 +470,7 @@ with demo:
     with gr.Accordion("What is encrypted anonymization?", open=False):
         gr.Markdown(
-            """
-        Anonymization is the process of removing personally identifiable information (PII)
         from data to protect individual privacy.
         To resolve trust issues when deploying anonymization as a cloud service, Fully Homomorphic
@@ -507,13 +503,16 @@ with demo:
     ########################## Main document Part ##########################
     gr.Markdown("## Step 2: Private document")
     with gr.Row():
         with gr.Column():
             gr.Markdown("**Original document:**")
             gr.Markdown(
-                """This document was retrieved from the [Microsoft Presidio](https://huggingface.co/spaces/presidio/presidio_demo) demo.\n\n
                 You can select and deselect sentences to customize the document that will be used
                 as the initial prompt for ChatGPT in step 5.
                 """
@@ -522,7 +521,7 @@ with demo:
             gr.Markdown("**Anonymized document:**")
             gr.Markdown(
                 """You can see below the anonymized text, replaced with hexademical strings, that
-                will be sent to ChatGPT.
                 ChatGPT will then be able to answer any queries about the document.
                 """
@@ -554,7 +553,8 @@ with demo:
     gr.Markdown(
         """Now, you can formulate a query. Please choose from the predefined options in
-        “Queries examples” or craft a custom question in the “Customized query” text box.
         Remain concise and relevant to the context. Any off-topic query will not be processed.
         """

         encrypted_tokens.append(encrypted_x)
+    print("Data encrypted ✅ on Client Side")
     assert len({len(token) for token in encrypted_tokens}) == 1
     else:
         identified_df = pd.DataFrame(columns=["Identified Words", "Probability"])
+    print("Decryption done ✅ on Client Side")
     return anonymized_text, identified_df
 def query_chatgpt_fn(anonymized_query, anonymized_document):
+    print("------------ Step 5: ChatGPT communication")
+    if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
         error_message = "Error ❌: Please generate the key first!"
+        return {chatgpt_response_anonymized: gr.update(value=error_message)}
+    if not (CLIENT_DIR / f"{USER_ID}_encrypted_output").is_file():
         error_message = "Error ❌: Please encrypt your query first!"
+        return {chatgpt_response_anonymized: gr.update(value=error_message)}
     prompt = read_txt(PROMPT_PATH)
     # Prepare prompt
+    initial_prompt = prompt + "\n"
     query = (
         "Document content:\n```\n"
         + anonymized_document
         + anonymized_query
         + "\n```"
     )
+    print(f'initial_prompt:\n{initial_prompt}')
     completion = client.chat.completions.create(
         model="gpt-4-1106-preview",  # Replace with "gpt-4" if available
         else:
             processed_tokens.append(token)
     deanonymized_response = "".join(processed_tokens)
+    return {chatgpt_response_anonymized: gr.update(value=anonymized_response),
+            chatgpt_response_deanonymized: gr.update(value=deanonymized_response)}
 demo = gr.Blocks(css=".markdown-body { font-size: 18px; }")
     with gr.Accordion("What is encrypted anonymization?", open=False):
         gr.Markdown(
+        """Anonymization is the process of removing personally identifiable information (PII)
         from data to protect individual privacy.
         To resolve trust issues when deploying anonymization as a cloud service, Fully Homomorphic
     ########################## Main document Part ##########################
+    gr.Markdown("<hr />")
     gr.Markdown("## Step 2: Private document")
     with gr.Row():
         with gr.Column():
             gr.Markdown("**Original document:**")
             gr.Markdown(
+                """This document was retrieved from the
+                [Microsoft Presidio](https://huggingface.co/spaces/presidio/presidio_demo) demo.
                 You can select and deselect sentences to customize the document that will be used
                 as the initial prompt for ChatGPT in step 5.
                 """
             gr.Markdown("**Anonymized document:**")
             gr.Markdown(
                 """You can see below the anonymized text, replaced with hexademical strings, that
+                will be sent to ChatGPT.
                 ChatGPT will then be able to answer any queries about the document.
                 """
     gr.Markdown(
         """Now, you can formulate a query. Please choose from the predefined options in
+        <span style='color:grey'>“Queries examples”</span>" or craft a custom question in
+        the <span style='color:grey'>“Customized query”</span>" text box.
         Remain concise and relevant to the context. Any off-topic query will not be processed.
         """