Spaces:
Runtime error
Runtime error
pushed minko branch
Browse files
app.py
CHANGED
@@ -13,33 +13,41 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipe
|
|
13 |
from scipy.special import softmax
|
14 |
from collections import defaultdict
|
15 |
import nltk
|
16 |
-
from utils import remove_special_characters
|
17 |
|
18 |
# Check if CUDA is available
|
19 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
20 |
print(f"Using device: {device}")
|
21 |
|
22 |
models = {
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
25 |
}
|
26 |
tokenizers = {
|
27 |
-
|
28 |
-
|
29 |
}
|
30 |
|
|
|
31 |
# Function to move model to the appropriate device
|
32 |
def to_device(model):
|
33 |
return model.to(device)
|
34 |
|
|
|
35 |
def copy_to_input(text):
|
36 |
return text
|
37 |
|
|
|
38 |
def remove_bracketed_numbers(text):
|
39 |
pattern = r"^\[\d+\]"
|
40 |
cleaned_text = re.sub(pattern, "", text)
|
41 |
return cleaned_text
|
42 |
|
|
|
43 |
def clean_text(text: str) -> str:
|
44 |
paragraphs = text.split("\n\n")
|
45 |
cleaned_paragraphs = []
|
@@ -49,6 +57,7 @@ def clean_text(text: str) -> str:
|
|
49 |
cleaned_paragraphs.append(cleaned)
|
50 |
return "\n".join(cleaned_paragraphs)
|
51 |
|
|
|
52 |
def format_and_correct(text: str) -> str:
|
53 |
prompt = f"""
|
54 |
Please correct the formatting, grammar, and spelling errors in the following text without changing its content significantly. Ensure proper paragraph breaks and maintain the original content:
|
@@ -57,6 +66,7 @@ def format_and_correct(text: str) -> str:
|
|
57 |
corrected_text = generate(prompt, "Groq", None)
|
58 |
return clean_text(corrected_text)
|
59 |
|
|
|
60 |
def format_and_correct_para(text: str) -> str:
|
61 |
paragraphs = text.split("\n")
|
62 |
corrected_paragraphs = []
|
@@ -66,6 +76,7 @@ def format_and_correct_para(text: str) -> str:
|
|
66 |
corrected_text = "\n\n".join(corrected_paragraphs)
|
67 |
return corrected_text
|
68 |
|
|
|
69 |
def format_and_correct_language_check(text: str) -> str:
|
70 |
tool = language_tool_python.LanguageTool("en-US")
|
71 |
return tool.correct(text)
|
@@ -86,60 +97,79 @@ def predict(model, tokenizer, text):
|
|
86 |
output = model(**tokens)
|
87 |
output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
|
88 |
output_norm = {"HUMAN": output_norm[0], "AI": output_norm[1]}
|
89 |
-
return output_norm
|
90 |
|
91 |
-
|
|
|
92 |
return predict(models[model], tokenizers[model], text)
|
93 |
|
94 |
-
|
|
|
|
|
95 |
sentences = nltk.sent_tokenize(text)
|
96 |
num_sentences = len(sentences)
|
97 |
scores = defaultdict(list)
|
|
|
98 |
overall_scores = []
|
99 |
-
|
|
|
100 |
for i in range(num_sentences):
|
101 |
-
chunk =
|
102 |
-
if chunk:
|
|
|
103 |
result = ai_generated_test(chunk, model)
|
104 |
-
score = result[
|
105 |
-
for j in range(i, min(i+3, num_sentences)):
|
106 |
scores[j].append(score)
|
107 |
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
overall_score = sum(overall_scores) / len(overall_scores)
|
120 |
overall_score = {"HUMAN": 1 - overall_score, "AI": overall_score}
|
121 |
-
return overall_score, "
|
|
|
122 |
|
123 |
ai_check_options = [
|
124 |
"Polygraf AI Watson (Base Model)",
|
125 |
"Polygraf AI Sherlock (Advanced Model)",
|
126 |
]
|
127 |
|
|
|
128 |
def ai_generated_test_sapling(text: str) -> Dict:
|
129 |
response = requests.post(
|
130 |
-
"https://api.sapling.ai/api/v1/aidetect",
|
131 |
-
json={"key": "60L9BPSVPIIOEZM0CD1DQWRBPJIUR7SB", "text": f"{text}"}
|
132 |
)
|
133 |
return {"AI": response.json()["score"], "HUMAN": 1 - response.json()["score"]}
|
134 |
|
|
|
135 |
class GPT2PPL:
|
136 |
def __init__(self):
|
137 |
self.device = device
|
138 |
-
self.model = to_device(GPT2LMHeadModel.from_pretrained(
|
139 |
-
self.tokenizer = GPT2TokenizerFast.from_pretrained(
|
140 |
|
141 |
def __call__(self, text):
|
142 |
-
encodings = self.tokenizer(text, return_tensors=
|
143 |
encodings = {k: v.to(self.device) for k, v in encodings.items()}
|
144 |
max_length = self.model.config.n_positions
|
145 |
stride = 512
|
@@ -163,15 +193,18 @@ class GPT2PPL:
|
|
163 |
ppl = torch.exp(torch.stack(nlls).sum() / end_loc)
|
164 |
return {"AI": float(ppl), "HUMAN": 1 - float(ppl)}
|
165 |
|
|
|
166 |
def ai_generated_test_gptzero(text):
|
167 |
gptzero_model = GPT2PPL()
|
168 |
result = gptzero_model(text)
|
169 |
print(result)
|
170 |
return result, None
|
171 |
|
|
|
172 |
def highlighter_polygraf(text, model="Polygraf AI Watson (Base Model)"):
|
173 |
return process_text(text=text, model=model)
|
174 |
|
|
|
175 |
def ai_check(text: str, option: str):
|
176 |
if option.startswith("Polygraf AI"):
|
177 |
return highlighter_polygraf(text, option)
|
@@ -211,6 +244,7 @@ def generate_prompt(settings: Dict[str, str]) -> str:
|
|
211 |
"""
|
212 |
return prompt
|
213 |
|
|
|
214 |
def regenerate_prompt(settings: Dict[str, str]) -> str:
|
215 |
prompt = f"""
|
216 |
"{settings['generated_article']}"
|
@@ -228,6 +262,7 @@ def regenerate_prompt(settings: Dict[str, str]) -> str:
|
|
228 |
"""
|
229 |
return prompt
|
230 |
|
|
|
231 |
def generate_article(
|
232 |
topic: str,
|
233 |
keywords: str,
|
@@ -290,6 +325,7 @@ def generate_article(
|
|
290 |
|
291 |
return clean_text(article)
|
292 |
|
|
|
293 |
def humanize(
|
294 |
text: str,
|
295 |
model: str,
|
@@ -308,12 +344,14 @@ def humanize(
|
|
308 |
)
|
309 |
return format_and_correct_language_check(result)
|
310 |
|
|
|
311 |
def update_visibility_api(model: str):
|
312 |
if model in ["OpenAI GPT 3.5", "OpenAI GPT 4"]:
|
313 |
return gr.update(visible=True)
|
314 |
else:
|
315 |
return gr.update(visible=False)
|
316 |
|
|
|
317 |
def format_references(text: str) -> str:
|
318 |
lines = text.split("\n")
|
319 |
references = []
|
@@ -336,6 +374,7 @@ def format_references(text: str) -> str:
|
|
336 |
|
337 |
return "\n\n".join(article_text) + "\n\nReferences:\n" + "\n".join(formatted_refs)
|
338 |
|
|
|
339 |
def generate_and_format(
|
340 |
topic,
|
341 |
keywords,
|
@@ -374,6 +413,7 @@ def generate_and_format(
|
|
374 |
)
|
375 |
return format_references(article)
|
376 |
|
|
|
377 |
def create_interface():
|
378 |
with gr.Blocks(
|
379 |
theme=gr.themes.Default(
|
@@ -422,7 +462,7 @@ def create_interface():
|
|
422 |
step=50,
|
423 |
value=1000,
|
424 |
label="Article Length",
|
425 |
-
elem_classes="input-highlight-pink"
|
426 |
)
|
427 |
|
428 |
with gr.Row():
|
@@ -554,14 +594,14 @@ def create_interface():
|
|
554 |
label="Add comments to help edit generated text", interactive=True, visible=False
|
555 |
)
|
556 |
regenerate_btn = gr.Button("Regenerate Article", variant="primary", visible=False)
|
557 |
-
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
ai_check_result = gr.Label(label="AI Check Result")
|
564 |
-
|
565 |
humanize_btn = gr.Button("Humanize")
|
566 |
# humanized_output = gr.Textbox(label="Humanized Article", lines=20, elem_classes=["custom-textbox"])
|
567 |
humanized_output = gr.Markdown(label="Humanized Article", value="\n\n\n\n", render=True)
|
@@ -582,6 +622,7 @@ def create_interface():
|
|
582 |
ai_detector_dropdown.change(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
|
583 |
output_article.change(become_visible, inputs=output_article, outputs=ai_comments)
|
584 |
ai_comments.change(become_visible, inputs=output_article, outputs=regenerate_btn)
|
|
|
585 |
|
586 |
generate_btn.click(
|
587 |
fn=generate_and_format,
|
@@ -658,4 +699,4 @@ def create_interface():
|
|
658 |
if __name__ == "__main__":
|
659 |
demo = create_interface()
|
660 |
# demo.launch(server_name="0.0.0.0", share=True, server_port=7890)
|
661 |
-
demo.launch(server_name="0.0.0.0")
|
|
|
13 |
from scipy.special import softmax
|
14 |
from collections import defaultdict
|
15 |
import nltk
|
16 |
+
from utils import remove_special_characters
|
17 |
|
18 |
# Check if CUDA is available
|
19 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
20 |
print(f"Using device: {device}")
|
21 |
|
22 |
models = {
|
23 |
+
"Polygraf AI Watson (Base Model)": AutoModelForSequenceClassification.from_pretrained(
|
24 |
+
"polygraf-ai/bc-roberta-openai-2sent"
|
25 |
+
).to(device),
|
26 |
+
"Polygraf AI Sherlock (Advanced Model)": AutoModelForSequenceClassification.from_pretrained(
|
27 |
+
"polygraf-ai/bc_combined_3sent"
|
28 |
+
).to(device),
|
29 |
}
|
30 |
tokenizers = {
|
31 |
+
"Polygraf AI Watson (Base Model)": AutoTokenizer.from_pretrained("polygraf-ai/bc-roberta-openai-2sent"),
|
32 |
+
"Polygraf AI Sherlock (Advanced Model)": AutoTokenizer.from_pretrained("polygraf-ai/bc_combined_3sent"),
|
33 |
}
|
34 |
|
35 |
+
|
36 |
# Function to move model to the appropriate device
|
37 |
def to_device(model):
|
38 |
return model.to(device)
|
39 |
|
40 |
+
|
41 |
def copy_to_input(text):
|
42 |
return text
|
43 |
|
44 |
+
|
45 |
def remove_bracketed_numbers(text):
|
46 |
pattern = r"^\[\d+\]"
|
47 |
cleaned_text = re.sub(pattern, "", text)
|
48 |
return cleaned_text
|
49 |
|
50 |
+
|
51 |
def clean_text(text: str) -> str:
|
52 |
paragraphs = text.split("\n\n")
|
53 |
cleaned_paragraphs = []
|
|
|
57 |
cleaned_paragraphs.append(cleaned)
|
58 |
return "\n".join(cleaned_paragraphs)
|
59 |
|
60 |
+
|
61 |
def format_and_correct(text: str) -> str:
|
62 |
prompt = f"""
|
63 |
Please correct the formatting, grammar, and spelling errors in the following text without changing its content significantly. Ensure proper paragraph breaks and maintain the original content:
|
|
|
66 |
corrected_text = generate(prompt, "Groq", None)
|
67 |
return clean_text(corrected_text)
|
68 |
|
69 |
+
|
70 |
def format_and_correct_para(text: str) -> str:
|
71 |
paragraphs = text.split("\n")
|
72 |
corrected_paragraphs = []
|
|
|
76 |
corrected_text = "\n\n".join(corrected_paragraphs)
|
77 |
return corrected_text
|
78 |
|
79 |
+
|
80 |
def format_and_correct_language_check(text: str) -> str:
|
81 |
tool = language_tool_python.LanguageTool("en-US")
|
82 |
return tool.correct(text)
|
|
|
97 |
output = model(**tokens)
|
98 |
output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
|
99 |
output_norm = {"HUMAN": output_norm[0], "AI": output_norm[1]}
|
100 |
+
return output_norm
|
101 |
|
102 |
+
|
103 |
+
def ai_generated_test(text, model="BC Original"):
|
104 |
return predict(models[model], tokenizers[model], text)
|
105 |
|
106 |
+
|
107 |
+
def process_text(text, model="BC Original"):
|
108 |
+
# sentences = split_into_sentences(text)
|
109 |
sentences = nltk.sent_tokenize(text)
|
110 |
num_sentences = len(sentences)
|
111 |
scores = defaultdict(list)
|
112 |
+
|
113 |
overall_scores = []
|
114 |
+
|
115 |
+
# Process each chunk of 3 sentences and store the score for each sentence in the chunk
|
116 |
for i in range(num_sentences):
|
117 |
+
chunk = " ".join(sentences[i : i + 3])
|
118 |
+
if chunk:
|
119 |
+
# result = classifier(chunk)
|
120 |
result = ai_generated_test(chunk, model)
|
121 |
+
score = result["AI"]
|
122 |
+
for j in range(i, min(i + 3, num_sentences)):
|
123 |
scores[j].append(score)
|
124 |
|
125 |
+
# Calculate the average score for each sentence and apply color coding
|
126 |
+
paragraphs = text.split("\n")
|
127 |
+
paragraphs = [s for s in paragraphs if s.strip()]
|
128 |
+
colored_paragraphs = []
|
129 |
+
i = 0
|
130 |
+
for paragraph in paragraphs:
|
131 |
+
temp_sentences = nltk.sent_tokenize(paragraph)
|
132 |
+
colored_sentences = []
|
133 |
+
for sentence in temp_sentences:
|
134 |
+
if scores[i]:
|
135 |
+
avg_score = sum(scores[i]) / len(scores[i])
|
136 |
+
if avg_score >= 0.65:
|
137 |
+
colored_sentence = f"<span style='background-color:red;'>{sentence}</span>"
|
138 |
+
else:
|
139 |
+
colored_sentence = sentence
|
140 |
+
colored_sentences.append(colored_sentence)
|
141 |
+
overall_scores.append(avg_score)
|
142 |
+
i = i + 1
|
143 |
+
combined_sentences = " ".join(colored_sentences)
|
144 |
+
print(combined_sentences)
|
145 |
+
colored_paragraphs.append(combined_sentences)
|
146 |
+
|
147 |
overall_score = sum(overall_scores) / len(overall_scores)
|
148 |
overall_score = {"HUMAN": 1 - overall_score, "AI": overall_score}
|
149 |
+
return overall_score, format_references("<br><br>".join(colored_paragraphs))
|
150 |
+
|
151 |
|
152 |
ai_check_options = [
|
153 |
"Polygraf AI Watson (Base Model)",
|
154 |
"Polygraf AI Sherlock (Advanced Model)",
|
155 |
]
|
156 |
|
157 |
+
|
158 |
def ai_generated_test_sapling(text: str) -> Dict:
|
159 |
response = requests.post(
|
160 |
+
"https://api.sapling.ai/api/v1/aidetect", json={"key": "60L9BPSVPIIOEZM0CD1DQWRBPJIUR7SB", "text": f"{text}"}
|
|
|
161 |
)
|
162 |
return {"AI": response.json()["score"], "HUMAN": 1 - response.json()["score"]}
|
163 |
|
164 |
+
|
165 |
class GPT2PPL:
|
166 |
def __init__(self):
|
167 |
self.device = device
|
168 |
+
self.model = to_device(GPT2LMHeadModel.from_pretrained("gpt2"))
|
169 |
+
self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
|
170 |
|
171 |
def __call__(self, text):
|
172 |
+
encodings = self.tokenizer(text, return_tensors="pt")
|
173 |
encodings = {k: v.to(self.device) for k, v in encodings.items()}
|
174 |
max_length = self.model.config.n_positions
|
175 |
stride = 512
|
|
|
193 |
ppl = torch.exp(torch.stack(nlls).sum() / end_loc)
|
194 |
return {"AI": float(ppl), "HUMAN": 1 - float(ppl)}
|
195 |
|
196 |
+
|
197 |
def ai_generated_test_gptzero(text):
|
198 |
gptzero_model = GPT2PPL()
|
199 |
result = gptzero_model(text)
|
200 |
print(result)
|
201 |
return result, None
|
202 |
|
203 |
+
|
204 |
def highlighter_polygraf(text, model="Polygraf AI Watson (Base Model)"):
|
205 |
return process_text(text=text, model=model)
|
206 |
|
207 |
+
|
208 |
def ai_check(text: str, option: str):
|
209 |
if option.startswith("Polygraf AI"):
|
210 |
return highlighter_polygraf(text, option)
|
|
|
244 |
"""
|
245 |
return prompt
|
246 |
|
247 |
+
|
248 |
def regenerate_prompt(settings: Dict[str, str]) -> str:
|
249 |
prompt = f"""
|
250 |
"{settings['generated_article']}"
|
|
|
262 |
"""
|
263 |
return prompt
|
264 |
|
265 |
+
|
266 |
def generate_article(
|
267 |
topic: str,
|
268 |
keywords: str,
|
|
|
325 |
|
326 |
return clean_text(article)
|
327 |
|
328 |
+
|
329 |
def humanize(
|
330 |
text: str,
|
331 |
model: str,
|
|
|
344 |
)
|
345 |
return format_and_correct_language_check(result)
|
346 |
|
347 |
+
|
348 |
def update_visibility_api(model: str):
|
349 |
if model in ["OpenAI GPT 3.5", "OpenAI GPT 4"]:
|
350 |
return gr.update(visible=True)
|
351 |
else:
|
352 |
return gr.update(visible=False)
|
353 |
|
354 |
+
|
355 |
def format_references(text: str) -> str:
|
356 |
lines = text.split("\n")
|
357 |
references = []
|
|
|
374 |
|
375 |
return "\n\n".join(article_text) + "\n\nReferences:\n" + "\n".join(formatted_refs)
|
376 |
|
377 |
+
|
378 |
def generate_and_format(
|
379 |
topic,
|
380 |
keywords,
|
|
|
413 |
)
|
414 |
return format_references(article)
|
415 |
|
416 |
+
|
417 |
def create_interface():
|
418 |
with gr.Blocks(
|
419 |
theme=gr.themes.Default(
|
|
|
462 |
step=50,
|
463 |
value=1000,
|
464 |
label="Article Length",
|
465 |
+
elem_classes="input-highlight-pink",
|
466 |
)
|
467 |
|
468 |
with gr.Row():
|
|
|
594 |
label="Add comments to help edit generated text", interactive=True, visible=False
|
595 |
)
|
596 |
regenerate_btn = gr.Button("Regenerate Article", variant="primary", visible=False)
|
597 |
+
ai_detector_dropdown = gr.Radio(
|
598 |
+
choices=ai_check_options, label="Select AI Detector", value="Polygraf AI"
|
599 |
+
)
|
600 |
+
ai_check_btn = gr.Button("AI Check")
|
601 |
+
|
602 |
+
with gr.Accordion("AI Detection Results", open=True):
|
603 |
ai_check_result = gr.Label(label="AI Check Result")
|
604 |
+
highlighted_text = gr.HTML(label="Sentence Breakdown", visible=False)
|
605 |
humanize_btn = gr.Button("Humanize")
|
606 |
# humanized_output = gr.Textbox(label="Humanized Article", lines=20, elem_classes=["custom-textbox"])
|
607 |
humanized_output = gr.Markdown(label="Humanized Article", value="\n\n\n\n", render=True)
|
|
|
622 |
ai_detector_dropdown.change(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
|
623 |
output_article.change(become_visible, inputs=output_article, outputs=ai_comments)
|
624 |
ai_comments.change(become_visible, inputs=output_article, outputs=regenerate_btn)
|
625 |
+
ai_check_btn.click(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
|
626 |
|
627 |
generate_btn.click(
|
628 |
fn=generate_and_format,
|
|
|
699 |
if __name__ == "__main__":
|
700 |
demo = create_interface()
|
701 |
# demo.launch(server_name="0.0.0.0", share=True, server_port=7890)
|
702 |
+
demo.launch(server_name="0.0.0.0")
|