Spaces:
Runtime error
Runtime error
add popup html + report humanizer section
Browse files- ai_generate.py +29 -34
- app.py +238 -34
ai_generate.py
CHANGED
@@ -85,9 +85,8 @@ Return a citation for every quote across all articles that justify the text. Rem
|
|
85 |
The entire text should be wrapped in one cited_text. For References section (if asked by prompt), don't add citations.
|
86 |
For source id, give a valid integer alone without a key.
|
87 |
Here are the sources:{context}"""
|
88 |
-
xml_prompt = ChatPromptTemplate.from_messages(
|
89 |
-
|
90 |
-
)
|
91 |
|
92 |
def format_docs_xml(docs: list[Document]) -> str:
|
93 |
formatted = []
|
@@ -106,8 +105,7 @@ def get_doc_content(docs, id):
|
|
106 |
|
107 |
|
108 |
def remove_citations(text):
|
109 |
-
text = re.sub(r
|
110 |
-
text = re.sub(r'[\d+]', '', text)
|
111 |
return text
|
112 |
|
113 |
|
@@ -115,24 +113,24 @@ def display_cited_text(data):
|
|
115 |
combined_text = ""
|
116 |
citations = {}
|
117 |
# Iterate through the cited_text list
|
118 |
-
if
|
119 |
-
for item in data[
|
120 |
-
if
|
121 |
-
chunk_text = item[
|
122 |
combined_text += chunk_text
|
123 |
citation_ids = []
|
124 |
# Process the citations for the chunk
|
125 |
-
if len(item[
|
126 |
-
for c in item[
|
127 |
-
if c and
|
128 |
-
citation = c[
|
129 |
if isinstance(citation, dict) and "source_id" in citation:
|
130 |
-
citation = citation[
|
131 |
if isinstance(citation, str):
|
132 |
try:
|
133 |
citation_ids.append(int(citation))
|
134 |
except ValueError:
|
135 |
-
pass
|
136 |
if citation_ids:
|
137 |
citation_texts = [f"<{cid}>" for cid in citation_ids]
|
138 |
combined_text += " " + "".join(citation_texts)
|
@@ -144,24 +142,27 @@ def get_citations(data, docs):
|
|
144 |
# Initialize variables for the combined text and a dictionary for citations
|
145 |
citations = {}
|
146 |
# Iterate through the cited_text list
|
147 |
-
if data.get(
|
148 |
-
for item in data[
|
149 |
citation_ids = []
|
150 |
-
if
|
151 |
-
for c in item[
|
152 |
-
if c and
|
153 |
-
citation = c[
|
154 |
if isinstance(citation, dict) and "source_id" in citation:
|
155 |
-
citation = citation[
|
156 |
if isinstance(citation, str):
|
157 |
try:
|
158 |
citation_ids.append(int(citation))
|
159 |
except ValueError:
|
160 |
-
pass
|
161 |
# Store unique citations in a dictionary
|
162 |
for citation_id in citation_ids:
|
163 |
if citation_id not in citations:
|
164 |
-
citations[citation_id] = {
|
|
|
|
|
|
|
165 |
|
166 |
return citations
|
167 |
|
@@ -243,16 +244,12 @@ def generate_rag(
|
|
243 |
docs = retriever.get_relevant_documents(topic)
|
244 |
|
245 |
formatted_docs = format_docs_xml(docs)
|
246 |
-
rag_chain = (
|
247 |
-
RunnablePassthrough.assign(context=lambda _: formatted_docs)
|
248 |
-
| xml_prompt
|
249 |
-
| llm
|
250 |
-
| XMLOutputParser()
|
251 |
-
)
|
252 |
result = rag_chain.invoke({"input": prompt})
|
253 |
citations = get_citations(result, docs)
|
254 |
return result, citations
|
255 |
|
|
|
256 |
def generate_base(
|
257 |
prompt: str, topic: str, model: str, temperature: float, max_length: int, api_key: str, sys_message=""
|
258 |
):
|
@@ -262,9 +259,7 @@ def generate_base(
|
|
262 |
return None, None
|
263 |
try:
|
264 |
output = llm.invoke(prompt).content
|
265 |
-
output_dict = {
|
266 |
-
{'chunk': [{'text': output}, {'citations': None}]}
|
267 |
-
]}
|
268 |
return output_dict, None
|
269 |
except Exception as e:
|
270 |
print(f"An error occurred while running the model: {e}")
|
@@ -285,4 +280,4 @@ def generate(
|
|
285 |
if path or url_content:
|
286 |
return generate_rag(prompt, topic, model, url_content, path, temperature, max_length, api_key, sys_message)
|
287 |
else:
|
288 |
-
return generate_base(prompt, topic, model, temperature, max_length, api_key, sys_message)
|
|
|
85 |
The entire text should be wrapped in one cited_text. For References section (if asked by prompt), don't add citations.
|
86 |
For source id, give a valid integer alone without a key.
|
87 |
Here are the sources:{context}"""
|
88 |
+
xml_prompt = ChatPromptTemplate.from_messages([("system", xml_system), ("human", "{input}")])
|
89 |
+
|
|
|
90 |
|
91 |
def format_docs_xml(docs: list[Document]) -> str:
|
92 |
formatted = []
|
|
|
105 |
|
106 |
|
107 |
def remove_citations(text):
|
108 |
+
text = re.sub(r"<\d+>", "", text)
|
|
|
109 |
return text
|
110 |
|
111 |
|
|
|
113 |
combined_text = ""
|
114 |
citations = {}
|
115 |
# Iterate through the cited_text list
|
116 |
+
if "cited_text" in data:
|
117 |
+
for item in data["cited_text"]:
|
118 |
+
if "chunk" in item and len(item["chunk"]) > 0:
|
119 |
+
chunk_text = item["chunk"][0].get("text")
|
120 |
combined_text += chunk_text
|
121 |
citation_ids = []
|
122 |
# Process the citations for the chunk
|
123 |
+
if len(item["chunk"]) > 1 and item["chunk"][1]["citations"]:
|
124 |
+
for c in item["chunk"][1]["citations"]:
|
125 |
+
if c and "citation" in c:
|
126 |
+
citation = c["citation"]
|
127 |
if isinstance(citation, dict) and "source_id" in citation:
|
128 |
+
citation = citation["source_id"]
|
129 |
if isinstance(citation, str):
|
130 |
try:
|
131 |
citation_ids.append(int(citation))
|
132 |
except ValueError:
|
133 |
+
pass # Handle cases where the string is not a valid integer
|
134 |
if citation_ids:
|
135 |
citation_texts = [f"<{cid}>" for cid in citation_ids]
|
136 |
combined_text += " " + "".join(citation_texts)
|
|
|
142 |
# Initialize variables for the combined text and a dictionary for citations
|
143 |
citations = {}
|
144 |
# Iterate through the cited_text list
|
145 |
+
if data.get("cited_text"):
|
146 |
+
for item in data["cited_text"]:
|
147 |
citation_ids = []
|
148 |
+
if "chunk" in item and len(item["chunk"]) > 1 and item["chunk"][1].get("citations"):
|
149 |
+
for c in item["chunk"][1]["citations"]:
|
150 |
+
if c and "citation" in c:
|
151 |
+
citation = c["citation"]
|
152 |
if isinstance(citation, dict) and "source_id" in citation:
|
153 |
+
citation = citation["source_id"]
|
154 |
if isinstance(citation, str):
|
155 |
try:
|
156 |
citation_ids.append(int(citation))
|
157 |
except ValueError:
|
158 |
+
pass # Handle cases where the string is not a valid integer
|
159 |
# Store unique citations in a dictionary
|
160 |
for citation_id in citation_ids:
|
161 |
if citation_id not in citations:
|
162 |
+
citations[citation_id] = {
|
163 |
+
"source": docs[citation_id].metadata["source"],
|
164 |
+
"content": docs[citation_id].page_content,
|
165 |
+
}
|
166 |
|
167 |
return citations
|
168 |
|
|
|
244 |
docs = retriever.get_relevant_documents(topic)
|
245 |
|
246 |
formatted_docs = format_docs_xml(docs)
|
247 |
+
rag_chain = RunnablePassthrough.assign(context=lambda _: formatted_docs) | xml_prompt | llm | XMLOutputParser()
|
|
|
|
|
|
|
|
|
|
|
248 |
result = rag_chain.invoke({"input": prompt})
|
249 |
citations = get_citations(result, docs)
|
250 |
return result, citations
|
251 |
|
252 |
+
|
253 |
def generate_base(
|
254 |
prompt: str, topic: str, model: str, temperature: float, max_length: int, api_key: str, sys_message=""
|
255 |
):
|
|
|
259 |
return None, None
|
260 |
try:
|
261 |
output = llm.invoke(prompt).content
|
262 |
+
output_dict = {"cited_text": [{"chunk": [{"text": output}, {"citations": None}]}]}
|
|
|
|
|
263 |
return output_dict, None
|
264 |
except Exception as e:
|
265 |
print(f"An error occurred while running the model: {e}")
|
|
|
280 |
if path or url_content:
|
281 |
return generate_rag(prompt, topic, model, url_content, path, temperature, max_length, api_key, sys_message)
|
282 |
else:
|
283 |
+
return generate_base(prompt, topic, model, temperature, max_length, api_key, sys_message)
|
app.py
CHANGED
@@ -22,6 +22,11 @@ from humanize import humanize_text, device
|
|
22 |
from ai_generate import generate, citations_to_html, remove_citations, display_cited_text
|
23 |
import nltk
|
24 |
|
|
|
|
|
|
|
|
|
|
|
25 |
nltk.download("punkt_tab")
|
26 |
|
27 |
print(f"Using device: {device}")
|
@@ -43,6 +48,161 @@ tokenizers = {
|
|
43 |
tool = language_tool_python.LanguageTool("en-US")
|
44 |
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
# Function to move model to the appropriate device
|
47 |
def to_device(model):
|
48 |
return model.to(device)
|
@@ -256,7 +416,8 @@ def highlighter_polygraf(text, model="Polygraf AI (Base Model)"):
|
|
256 |
return score, text, mc_score
|
257 |
|
258 |
|
259 |
-
def ai_check(
|
|
|
260 |
if option.startswith("Polygraf AI"):
|
261 |
return highlighter_polygraf(text, option)
|
262 |
else:
|
@@ -382,15 +543,15 @@ def generate_article(
|
|
382 |
api_key=api_key,
|
383 |
sys_message="",
|
384 |
)
|
385 |
-
return article,
|
386 |
|
387 |
|
388 |
def get_history(history):
|
389 |
-
return history
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
|
395 |
|
396 |
def clear_history():
|
@@ -409,8 +570,9 @@ def humanize(
|
|
409 |
) -> str:
|
410 |
print("Humanizing text...")
|
411 |
# body, references = split_text_from_refs(text)
|
412 |
-
|
413 |
-
|
|
|
414 |
text=cited_text,
|
415 |
model_name=model,
|
416 |
temperature=temperature,
|
@@ -420,9 +582,22 @@ def humanize(
|
|
420 |
)
|
421 |
# result = result + references
|
422 |
# corrected_text = format_and_correct_language_check(result)
|
|
|
423 |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
424 |
-
history.append((f"Humanized Text | {timestamp}\nInput: {model}",
|
425 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
426 |
|
427 |
|
428 |
def update_visibility_api(model: str):
|
@@ -458,11 +633,6 @@ def update_temperature(model_dropdown):
|
|
458 |
return gr.update(value=1.0, interactive=True)
|
459 |
|
460 |
|
461 |
-
import uuid
|
462 |
-
import json
|
463 |
-
from datetime import datetime
|
464 |
-
from google.cloud import storage
|
465 |
-
|
466 |
# Initialize Google Cloud Storage client
|
467 |
client = storage.Client()
|
468 |
bucket_name = "ai-source-detection"
|
@@ -537,6 +707,31 @@ def save_to_cloud_storage(
|
|
537 |
return f"Data saved as {file_name} in GCS."
|
538 |
|
539 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
540 |
def generate_and_format(
|
541 |
input_role,
|
542 |
topic,
|
@@ -610,9 +805,10 @@ def generate_and_format(
|
|
610 |
# for url in url_content.keys():
|
611 |
# article += f"\n{url}"
|
612 |
|
|
|
613 |
# reference_formatted = format_references(article)
|
614 |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
615 |
-
history.append((f"Generated Text | {timestamp}\nInput: {topic}",
|
616 |
|
617 |
# Save the article and metadata to Cloud Storage
|
618 |
# We dont save if there is PDF input for privacy reasons
|
@@ -641,8 +837,7 @@ def generate_and_format(
|
|
641 |
timestamp,
|
642 |
)
|
643 |
print(save_message)
|
644 |
-
|
645 |
-
return clean_text(display_cited_text(article)), citations, history
|
646 |
|
647 |
|
648 |
def create_interface():
|
@@ -655,6 +850,7 @@ def create_interface():
|
|
655 |
""",
|
656 |
) as demo:
|
657 |
history = gr.State([])
|
|
|
658 |
today = date.today()
|
659 |
# dd/mm/YY
|
660 |
d1 = today.strftime("%d/%B/%Y")
|
@@ -869,15 +1065,20 @@ def create_interface():
|
|
869 |
|
870 |
with gr.Column(scale=3):
|
871 |
with gr.Tab("Text Generator"):
|
872 |
-
output_article = gr.
|
873 |
-
|
874 |
-
|
875 |
-
ai_comments = gr.Textbox(
|
876 |
-
label="Add comments to help edit generated text", interactive=True, visible=False
|
877 |
)
|
878 |
-
|
879 |
-
|
880 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
881 |
)
|
882 |
ai_check_btn = gr.Button("AI Check")
|
883 |
|
@@ -939,15 +1140,18 @@ def create_interface():
|
|
939 |
return gr.update(visible=False)
|
940 |
|
941 |
google_search_check.change(search_visible, inputs=google_search_check, outputs=search_options)
|
942 |
-
ai_detector_dropdown.change(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
|
943 |
-
output_article.change(regenerate_visible, inputs=output_article, outputs=ai_comments)
|
944 |
-
ai_comments.change(regenerate_visible, inputs=output_article, outputs=regenerate_btn)
|
945 |
ai_check_btn.click(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
|
946 |
|
947 |
# Update the default structure based on the selected format
|
948 |
# e.g. "Plain Text" for certain formats
|
949 |
input_format.change(fn=update_structure, inputs=input_format, outputs=input_structure)
|
950 |
model_dropdown.change(fn=update_temperature, inputs=model_dropdown, outputs=temperature_slider)
|
|
|
|
|
|
|
951 |
|
952 |
generate_btn.click(
|
953 |
fn=generate_and_format,
|
@@ -981,7 +1185,7 @@ def create_interface():
|
|
981 |
pdf_file_input,
|
982 |
history,
|
983 |
],
|
984 |
-
outputs=[output_article,
|
985 |
)
|
986 |
|
987 |
regenerate_btn.click(
|
@@ -1018,12 +1222,12 @@ def create_interface():
|
|
1018 |
exclude_sites,
|
1019 |
ai_comments,
|
1020 |
],
|
1021 |
-
outputs=[output_article,
|
1022 |
)
|
1023 |
|
1024 |
ai_check_btn.click(
|
1025 |
fn=ai_check,
|
1026 |
-
inputs=[
|
1027 |
outputs=[ai_check_result, highlighted_text, mc_check_result],
|
1028 |
)
|
1029 |
|
@@ -1038,7 +1242,7 @@ def create_interface():
|
|
1038 |
length_penalty_slider,
|
1039 |
history,
|
1040 |
],
|
1041 |
-
outputs=[output_article, history],
|
1042 |
)
|
1043 |
|
1044 |
generate_btn.click(get_history, inputs=[history], outputs=[history_chat])
|
|
|
22 |
from ai_generate import generate, citations_to_html, remove_citations, display_cited_text
|
23 |
import nltk
|
24 |
|
25 |
+
import uuid
|
26 |
+
import json
|
27 |
+
from datetime import datetime
|
28 |
+
from google.cloud import storage
|
29 |
+
|
30 |
nltk.download("punkt_tab")
|
31 |
|
32 |
print(f"Using device: {device}")
|
|
|
48 |
tool = language_tool_python.LanguageTool("en-US")
|
49 |
|
50 |
|
51 |
+
def generate_cited_html(cited_text, citations: dict):
|
52 |
+
cited_text = cited_text.replace("\n", "<br>")
|
53 |
+
html_code = """
|
54 |
+
<style>
|
55 |
+
.reference-container {
|
56 |
+
position: relative;
|
57 |
+
display: inline-block;
|
58 |
+
}
|
59 |
+
.reference-btn {
|
60 |
+
display: inline-block;
|
61 |
+
width: 25px;
|
62 |
+
height: 25px;
|
63 |
+
border-radius: 50%;
|
64 |
+
background-color: #0000EE; /* Blue color for the button */
|
65 |
+
color: white;
|
66 |
+
text-align: center;
|
67 |
+
line-height: 25px;
|
68 |
+
cursor: pointer;
|
69 |
+
font-weight: bold;
|
70 |
+
margin-right: 5px;
|
71 |
+
transition: background-color 0.3s ease, transform 0.3s ease;
|
72 |
+
}
|
73 |
+
.reference-btn:hover {
|
74 |
+
background-color: #1e90ff; /* Lighter blue on hover */
|
75 |
+
transform: scale(1.1); /* Slightly enlarge on hover */
|
76 |
+
}
|
77 |
+
.reference-popup {
|
78 |
+
display: none;
|
79 |
+
position: absolute;
|
80 |
+
z-index: 1;
|
81 |
+
top: 100%;
|
82 |
+
left: 0;
|
83 |
+
background-color: #f9f9f9;
|
84 |
+
border: 1px solid #ddd;
|
85 |
+
padding: 10px;
|
86 |
+
border-radius: 4px;
|
87 |
+
box-shadow: 0 2px 5px rgba(0,0,0,0.2);
|
88 |
+
width: calc(min(90vw, 500px));
|
89 |
+
max-height: calc(min(80vh, 300px));
|
90 |
+
overflow-y: auto;
|
91 |
+
transform: translateX(0); /* Default position */
|
92 |
+
}
|
93 |
+
.reference-container .reference-popup {
|
94 |
+
left: 50%;
|
95 |
+
transform: translateX(-50%); /* Center align popup horizontally by default */
|
96 |
+
}
|
97 |
+
.reference-container[data-align="right"] .reference-popup {
|
98 |
+
left: auto;
|
99 |
+
right: 0;
|
100 |
+
transform: translateX(-10%); /* Pull the popup slightly left when near right edge */
|
101 |
+
}
|
102 |
+
.reference-popup .close-btn {
|
103 |
+
float: right;
|
104 |
+
cursor: pointer;
|
105 |
+
font-weight: bold;
|
106 |
+
color: white;
|
107 |
+
font-size: 16px;
|
108 |
+
padding: 0;
|
109 |
+
width: 20px;
|
110 |
+
height: 20px;
|
111 |
+
text-align: center;
|
112 |
+
line-height: 20px;
|
113 |
+
background-color: #ff4c4c;
|
114 |
+
border-radius: 2px;
|
115 |
+
transition: transform 0.3s ease, background-color 0.3s ease;
|
116 |
+
}
|
117 |
+
.reference-popup .close-btn:hover {
|
118 |
+
transform: scale(1.2);
|
119 |
+
background-color: #ff3333;
|
120 |
+
}
|
121 |
+
input[type="radio"] {
|
122 |
+
position: absolute;
|
123 |
+
opacity: 0;
|
124 |
+
pointer-events: none;
|
125 |
+
}
|
126 |
+
input[type="radio"]:checked + .reference-popup {
|
127 |
+
display: block;
|
128 |
+
}
|
129 |
+
@media (prefers-color-scheme: dark) {
|
130 |
+
.reference-btn {
|
131 |
+
background-color: #1e90ff;
|
132 |
+
}
|
133 |
+
.reference-popup {
|
134 |
+
background-color: #2c2c2c;
|
135 |
+
border-color: #444;
|
136 |
+
color: #f1f1f1;
|
137 |
+
}
|
138 |
+
.reference-popup .close-btn {
|
139 |
+
background-color: #ff4c4c;
|
140 |
+
}
|
141 |
+
.reference-popup .close-btn:hover {
|
142 |
+
background-color: #ff3333;
|
143 |
+
}
|
144 |
+
}
|
145 |
+
</style>
|
146 |
+
<script>
|
147 |
+
document.addEventListener('click', (event) => {
|
148 |
+
const containers = document.querySelectorAll('.reference-container');
|
149 |
+
containers.forEach(container => {
|
150 |
+
const rect = container.getBoundingClientRect();
|
151 |
+
if (rect.right > window.innerWidth - 50) {
|
152 |
+
container.setAttribute('data-align', 'right');
|
153 |
+
} else if (rect.left < 50) {
|
154 |
+
container.setAttribute('data-align', 'left');
|
155 |
+
} else {
|
156 |
+
container.removeAttribute('data-align');
|
157 |
+
}
|
158 |
+
});
|
159 |
+
});
|
160 |
+
|
161 |
+
function closeReferencePanes(event) {
|
162 |
+
if (!event.target.closest('.reference-container')) {
|
163 |
+
const checkboxes = document.querySelectorAll('input[type="radio"]');
|
164 |
+
checkboxes.forEach(checkbox => checkbox.checked = false);
|
165 |
+
}
|
166 |
+
}
|
167 |
+
|
168 |
+
document.addEventListener('click', closeReferencePanes);
|
169 |
+
</script>
|
170 |
+
<div style="height: 600px; overflow-y: auto;">
|
171 |
+
"""
|
172 |
+
|
173 |
+
# Function to replace each citation with a reference button
|
174 |
+
def replace_citations(match):
|
175 |
+
citation_id = match.group(1) # Extract citation number from the match
|
176 |
+
ref_data = citations.get(int(citation_id))
|
177 |
+
|
178 |
+
# If reference data is not found, return the original text
|
179 |
+
if not ref_data:
|
180 |
+
return match.group(0)
|
181 |
+
|
182 |
+
# Split the content by newlines and wrap each in <p> tags to maintain paragraph structure
|
183 |
+
paragraphs = ref_data["content"].split("\n")
|
184 |
+
formatted_content = "".join(f"<p>{para.strip()}</p>" for para in paragraphs if para.strip())
|
185 |
+
|
186 |
+
# HTML code for the reference button and popup with paragraph formatting
|
187 |
+
button_html = f"""
|
188 |
+
<span class="reference-container">
|
189 |
+
<label for="ref-toggle-{citation_id}" class="reference-btn" onclick="closeReferencePanes(); document.getElementById('ref-toggle-{citation_id}').checked = true;">{citation_id}</label>
|
190 |
+
<input type="radio" id="ref-toggle-{citation_id}" name="reference" />
|
191 |
+
<span class="reference-popup">
|
192 |
+
<span class="close-btn" onclick="document.getElementById('ref-toggle-{citation_id}').checked = false;">×</span>
|
193 |
+
<strong>Source:</strong> {ref_data['source']}<br>
|
194 |
+
<strong>Content:</strong> {formatted_content}
|
195 |
+
</span>
|
196 |
+
</span>
|
197 |
+
"""
|
198 |
+
return button_html
|
199 |
+
|
200 |
+
# Replace inline citations in the text with the generated HTML
|
201 |
+
html_code += re.sub(r"<(\d+)>", replace_citations, cited_text)
|
202 |
+
html_code += "</div>"
|
203 |
+
return html_code
|
204 |
+
|
205 |
+
|
206 |
# Function to move model to the appropriate device
|
207 |
def to_device(model):
|
208 |
return model.to(device)
|
|
|
416 |
return score, text, mc_score
|
417 |
|
418 |
|
419 |
+
def ai_check(history: list, option: str):
|
420 |
+
text = history[-1][1]
|
421 |
if option.startswith("Polygraf AI"):
|
422 |
return highlighter_polygraf(text, option)
|
423 |
else:
|
|
|
543 |
api_key=api_key,
|
544 |
sys_message="",
|
545 |
)
|
546 |
+
return article, citations
|
547 |
|
548 |
|
549 |
def get_history(history):
|
550 |
+
# return history
|
551 |
+
history_formatted = []
|
552 |
+
for entry in history:
|
553 |
+
history_formatted.append((entry[0], entry[1]))
|
554 |
+
return history_formatted
|
555 |
|
556 |
|
557 |
def clear_history():
|
|
|
570 |
) -> str:
|
571 |
print("Humanizing text...")
|
572 |
# body, references = split_text_from_refs(text)
|
573 |
+
cited_text = history[-1][1]
|
574 |
+
citations = history[-1][2]
|
575 |
+
article = humanize_text(
|
576 |
text=cited_text,
|
577 |
model_name=model,
|
578 |
temperature=temperature,
|
|
|
582 |
)
|
583 |
# result = result + references
|
584 |
# corrected_text = format_and_correct_language_check(result)
|
585 |
+
article = clean_text(article)
|
586 |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
587 |
+
history.append((f"Humanized Text | {timestamp}\nInput: {model}", article, citations))
|
588 |
+
latest_humanizer_data = {
|
589 |
+
"original text": cited_text,
|
590 |
+
"humanized text": article,
|
591 |
+
"citations": citations, # can remove saving citations
|
592 |
+
"metadata": {
|
593 |
+
"temperature": temperature,
|
594 |
+
"repetition_penalty": repetition_penalty,
|
595 |
+
"top_k": top_k,
|
596 |
+
"length_penalty": length_penalty,
|
597 |
+
},
|
598 |
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
599 |
+
}
|
600 |
+
return generate_cited_html(article, citations), history, latest_humanizer_data
|
601 |
|
602 |
|
603 |
def update_visibility_api(model: str):
|
|
|
633 |
return gr.update(value=1.0, interactive=True)
|
634 |
|
635 |
|
|
|
|
|
|
|
|
|
|
|
636 |
# Initialize Google Cloud Storage client
|
637 |
client = storage.Client()
|
638 |
bucket_name = "ai-source-detection"
|
|
|
707 |
return f"Data saved as {file_name} in GCS."
|
708 |
|
709 |
|
710 |
+
def save_humanizer_feedback_to_cloud_storage(data, humanizer_feedback):
|
711 |
+
"""Save generated article and metadata to Google Cloud Storage within a specific folder."""
|
712 |
+
if data:
|
713 |
+
try:
|
714 |
+
data["user_feedback"] = humanizer_feedback
|
715 |
+
# Create a unique filename
|
716 |
+
file_id = str(uuid.uuid4())
|
717 |
+
|
718 |
+
# Define the file path and name in the bucket
|
719 |
+
folder_path = "ai-writer/humanizer-feedback/"
|
720 |
+
file_name = f"{folder_path}{data['timestamp'].replace(' ', '_').replace(':', '-')}_{file_id}.json"
|
721 |
+
|
722 |
+
# Convert data to JSON string
|
723 |
+
json_data = json.dumps(data)
|
724 |
+
|
725 |
+
# Create a blob and upload to GCS
|
726 |
+
blob = bucket.blob(file_name)
|
727 |
+
blob.upload_from_string(json_data, content_type="application/json")
|
728 |
+
gr.Info("Successfully reported. Thank you for the feedback!")
|
729 |
+
except Exception:
|
730 |
+
gr.Warning("Report not saved.")
|
731 |
+
else:
|
732 |
+
gr.Warning("Nothing humanized to save yet!")
|
733 |
+
|
734 |
+
|
735 |
def generate_and_format(
|
736 |
input_role,
|
737 |
topic,
|
|
|
805 |
# for url in url_content.keys():
|
806 |
# article += f"\n{url}"
|
807 |
|
808 |
+
article = clean_text(display_cited_text(article))
|
809 |
# reference_formatted = format_references(article)
|
810 |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
811 |
+
history.append((f"Generated Text | {timestamp}\nInput: {topic}", article, citations))
|
812 |
|
813 |
# Save the article and metadata to Cloud Storage
|
814 |
# We dont save if there is PDF input for privacy reasons
|
|
|
837 |
timestamp,
|
838 |
)
|
839 |
print(save_message)
|
840 |
+
return generate_cited_html(article, citations), history
|
|
|
841 |
|
842 |
|
843 |
def create_interface():
|
|
|
850 |
""",
|
851 |
) as demo:
|
852 |
history = gr.State([])
|
853 |
+
latest_humanizer_data = gr.State()
|
854 |
today = date.today()
|
855 |
# dd/mm/YY
|
856 |
d1 = today.strftime("%d/%B/%Y")
|
|
|
1065 |
|
1066 |
with gr.Column(scale=3):
|
1067 |
with gr.Tab("Text Generator"):
|
1068 |
+
output_article = gr.HTML(
|
1069 |
+
value="""<div style="height: 600px;"></div>""",
|
1070 |
+
label="Generated Article",
|
|
|
|
|
1071 |
)
|
1072 |
+
humanizer_feedback = gr.Textbox(label="Add optional feedback on humanizer")
|
1073 |
+
report_humanized_btn = gr.Button("Report Humanized Text", variant="primary", visible=True)
|
1074 |
+
with gr.Accordion("Regenerate Article", open=False):
|
1075 |
+
ai_comments = gr.Textbox(
|
1076 |
+
label="Add comments to help edit generated text", interactive=True, visible=True
|
1077 |
+
)
|
1078 |
+
regenerate_btn = gr.Button("Regenerate Article", variant="primary", visible=True)
|
1079 |
+
|
1080 |
+
ai_detector_dropdown = gr.Dropdown(
|
1081 |
+
choices=ai_check_options, label="Select AI Detector", value="Polygraf AI (Base Model)"
|
1082 |
)
|
1083 |
ai_check_btn = gr.Button("AI Check")
|
1084 |
|
|
|
1140 |
return gr.update(visible=False)
|
1141 |
|
1142 |
google_search_check.change(search_visible, inputs=google_search_check, outputs=search_options)
|
1143 |
+
# ai_detector_dropdown.change(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
|
1144 |
+
# output_article.change(regenerate_visible, inputs=output_article, outputs=ai_comments)
|
1145 |
+
# ai_comments.change(regenerate_visible, inputs=output_article, outputs=regenerate_btn)
|
1146 |
ai_check_btn.click(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
|
1147 |
|
1148 |
# Update the default structure based on the selected format
|
1149 |
# e.g. "Plain Text" for certain formats
|
1150 |
input_format.change(fn=update_structure, inputs=input_format, outputs=input_structure)
|
1151 |
model_dropdown.change(fn=update_temperature, inputs=model_dropdown, outputs=temperature_slider)
|
1152 |
+
report_humanized_btn.click(
|
1153 |
+
save_humanizer_feedback_to_cloud_storage, inputs=[latest_humanizer_data, humanizer_feedback]
|
1154 |
+
)
|
1155 |
|
1156 |
generate_btn.click(
|
1157 |
fn=generate_and_format,
|
|
|
1185 |
pdf_file_input,
|
1186 |
history,
|
1187 |
],
|
1188 |
+
outputs=[output_article, history],
|
1189 |
)
|
1190 |
|
1191 |
regenerate_btn.click(
|
|
|
1222 |
exclude_sites,
|
1223 |
ai_comments,
|
1224 |
],
|
1225 |
+
outputs=[output_article, history],
|
1226 |
)
|
1227 |
|
1228 |
ai_check_btn.click(
|
1229 |
fn=ai_check,
|
1230 |
+
inputs=[history, ai_detector_dropdown],
|
1231 |
outputs=[ai_check_result, highlighted_text, mc_check_result],
|
1232 |
)
|
1233 |
|
|
|
1242 |
length_penalty_slider,
|
1243 |
history,
|
1244 |
],
|
1245 |
+
outputs=[output_article, history, latest_humanizer_data],
|
1246 |
)
|
1247 |
|
1248 |
generate_btn.click(get_history, inputs=[history], outputs=[history_chat])
|