Spaces:

polygraf-ai
/

article_writer

Runtime error

App Files Files Community

minko186 commited on Jul 29, 2024

Commit

89644d7

1 Parent(s): c180684

pushed minko branch

Browse files

Files changed (1) hide show

app.py +80 -39

app.py CHANGED Viewed

@@ -13,33 +13,41 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipe
 from scipy.special import softmax
 from collections import defaultdict
 import nltk
-from utils import remove_special_characters
 # Check if CUDA is available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
 models = {
-    'Polygraf AI Watson (Base Model)': AutoModelForSequenceClassification.from_pretrained('polygraf-ai/bc-roberta-openai-2sent').to(device),
-    'Polygraf AI Sherlock (Advanced Model)': AutoModelForSequenceClassification.from_pretrained('polygraf-ai/bc_combined_3sent').to(device),
 }
 tokenizers = {
-    'Polygraf AI Watson (Base Model)': AutoTokenizer.from_pretrained('polygraf-ai/bc-roberta-openai-2sent'),
-    'Polygraf AI Sherlock (Advanced Model)': AutoTokenizer.from_pretrained('polygraf-ai/bc_combined_3sent'),
 }
 # Function to move model to the appropriate device
 def to_device(model):
     return model.to(device)
 def copy_to_input(text):
     return text
 def remove_bracketed_numbers(text):
     pattern = r"^\[\d+\]"
     cleaned_text = re.sub(pattern, "", text)
     return cleaned_text
 def clean_text(text: str) -> str:
     paragraphs = text.split("\n\n")
     cleaned_paragraphs = []
@@ -49,6 +57,7 @@ def clean_text(text: str) -> str:
         cleaned_paragraphs.append(cleaned)
     return "\n".join(cleaned_paragraphs)
 def format_and_correct(text: str) -> str:
     prompt = f"""
     Please correct the formatting, grammar, and spelling errors in the following text without changing its content significantly. Ensure proper paragraph breaks and maintain the original content:
@@ -57,6 +66,7 @@ def format_and_correct(text: str) -> str:
     corrected_text = generate(prompt, "Groq", None)
     return clean_text(corrected_text)
 def format_and_correct_para(text: str) -> str:
     paragraphs = text.split("\n")
     corrected_paragraphs = []
@@ -66,6 +76,7 @@ def format_and_correct_para(text: str) -> str:
     corrected_text = "\n\n".join(corrected_paragraphs)
     return corrected_text
 def format_and_correct_language_check(text: str) -> str:
     tool = language_tool_python.LanguageTool("en-US")
     return tool.correct(text)
@@ -86,60 +97,79 @@ def predict(model, tokenizer, text):
         output = model(**tokens)
         output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
         output_norm = {"HUMAN": output_norm[0], "AI": output_norm[1]}
-        return output_norm
-def ai_generated_test(text, model='BC Original'):
     return predict(models[model], tokenizers[model], text)
-def process_text(text, model='BC Original'):
     sentences = nltk.sent_tokenize(text)
     num_sentences = len(sentences)
     scores = defaultdict(list)
     overall_scores = []
     for i in range(num_sentences):
-        chunk = ' '.join(sentences[i:i+3])
-        if chunk:
             result = ai_generated_test(chunk, model)
-            score = result['AI']
-            for j in range(i, min(i+3, num_sentences)):
                 scores[j].append(score)
-    colored_sentences = []
-    for i, sentence in enumerate(sentences):
-        if scores[i]:
-            avg_score = sum(scores[i]) / len(scores[i])
-            if avg_score >= 0.65:
-                colored_sentence = f"<span style='background-color:red;'>{sentence}</span>"
-            else:
-                colored_sentence = sentence
-            colored_sentences.append(colored_sentence)
-            overall_scores.append(avg_score)
     overall_score = sum(overall_scores) / len(overall_scores)
     overall_score = {"HUMAN": 1 - overall_score, "AI": overall_score}
-    return overall_score, " ".join(colored_sentences)
 ai_check_options = [
     "Polygraf AI Watson (Base Model)",
     "Polygraf AI Sherlock (Advanced Model)",
 ]
 def ai_generated_test_sapling(text: str) -> Dict:
     response = requests.post(
-        "https://api.sapling.ai/api/v1/aidetect",
-        json={"key": "60L9BPSVPIIOEZM0CD1DQWRBPJIUR7SB", "text": f"{text}"}
     )
     return {"AI": response.json()["score"], "HUMAN": 1 - response.json()["score"]}
 class GPT2PPL:
     def __init__(self):
         self.device = device
-        self.model = to_device(GPT2LMHeadModel.from_pretrained('gpt2'))
-        self.tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
     def __call__(self, text):
-        encodings = self.tokenizer(text, return_tensors='pt')
         encodings = {k: v.to(self.device) for k, v in encodings.items()}
         max_length = self.model.config.n_positions
         stride = 512
@@ -163,15 +193,18 @@ class GPT2PPL:
         ppl = torch.exp(torch.stack(nlls).sum() / end_loc)
         return {"AI": float(ppl), "HUMAN": 1 - float(ppl)}
 def ai_generated_test_gptzero(text):
     gptzero_model = GPT2PPL()
     result = gptzero_model(text)
     print(result)
     return result, None
 def highlighter_polygraf(text, model="Polygraf AI Watson (Base Model)"):
     return process_text(text=text, model=model)
 def ai_check(text: str, option: str):
     if option.startswith("Polygraf AI"):
         return highlighter_polygraf(text, option)
@@ -211,6 +244,7 @@ def generate_prompt(settings: Dict[str, str]) -> str:
     """
     return prompt
 def regenerate_prompt(settings: Dict[str, str]) -> str:
     prompt = f"""
     "{settings['generated_article']}"
@@ -228,6 +262,7 @@ def regenerate_prompt(settings: Dict[str, str]) -> str:
     """
     return prompt
 def generate_article(
     topic: str,
     keywords: str,
@@ -290,6 +325,7 @@ def generate_article(
     return clean_text(article)
 def humanize(
     text: str,
     model: str,
@@ -308,12 +344,14 @@ def humanize(
     )
     return format_and_correct_language_check(result)
 def update_visibility_api(model: str):
     if model in ["OpenAI GPT 3.5", "OpenAI GPT 4"]:
         return gr.update(visible=True)
     else:
         return gr.update(visible=False)
 def format_references(text: str) -> str:
     lines = text.split("\n")
     references = []
@@ -336,6 +374,7 @@ def format_references(text: str) -> str:
     return "\n\n".join(article_text) + "\n\nReferences:\n" + "\n".join(formatted_refs)
 def generate_and_format(
     topic,
     keywords,
@@ -374,6 +413,7 @@ def generate_and_format(
     )
     return format_references(article)
 def create_interface():
     with gr.Blocks(
         theme=gr.themes.Default(
@@ -422,7 +462,7 @@ def create_interface():
                         step=50,
                         value=1000,
                         label="Article Length",
-                        elem_classes="input-highlight-pink"
                     )
                     with gr.Row():
@@ -554,14 +594,14 @@ def create_interface():
                     label="Add comments to help edit generated text", interactive=True, visible=False
                 )
                 regenerate_btn = gr.Button("Regenerate Article", variant="primary", visible=False)
-                with gr.Row():
-                    with gr.Column():
-                        ai_detector_dropdown = gr.Radio(
-                            choices=ai_check_options, label="Select AI Detector", value="Polygraf AI Watson (Base Model)"
-                        )
-                        ai_check_btn = gr.Button("AI Check")
                     ai_check_result = gr.Label(label="AI Check Result")
-                highlighted_text = gr.HTML(label="Sentence Breakdown", visible=False)
                 humanize_btn = gr.Button("Humanize")
                 # humanized_output = gr.Textbox(label="Humanized Article", lines=20, elem_classes=["custom-textbox"])
                 humanized_output = gr.Markdown(label="Humanized Article", value="\n\n\n\n", render=True)
@@ -582,6 +622,7 @@ def create_interface():
         ai_detector_dropdown.change(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
         output_article.change(become_visible, inputs=output_article, outputs=ai_comments)
         ai_comments.change(become_visible, inputs=output_article, outputs=regenerate_btn)
         generate_btn.click(
             fn=generate_and_format,
@@ -658,4 +699,4 @@ def create_interface():
 if __name__ == "__main__":
     demo = create_interface()
     # demo.launch(server_name="0.0.0.0", share=True, server_port=7890)
-    demo.launch(server_name="0.0.0.0")

 from scipy.special import softmax
 from collections import defaultdict
 import nltk
+from utils import remove_special_characters
 # Check if CUDA is available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
 models = {
+    "Polygraf AI Watson (Base Model)": AutoModelForSequenceClassification.from_pretrained(
+        "polygraf-ai/bc-roberta-openai-2sent"
+    ).to(device),
+    "Polygraf AI Sherlock (Advanced Model)": AutoModelForSequenceClassification.from_pretrained(
+        "polygraf-ai/bc_combined_3sent"
+    ).to(device),
 }
 tokenizers = {
+    "Polygraf AI Watson (Base Model)": AutoTokenizer.from_pretrained("polygraf-ai/bc-roberta-openai-2sent"),
+    "Polygraf AI Sherlock (Advanced Model)": AutoTokenizer.from_pretrained("polygraf-ai/bc_combined_3sent"),
 }
 # Function to move model to the appropriate device
 def to_device(model):
     return model.to(device)
 def copy_to_input(text):
     return text
 def remove_bracketed_numbers(text):
     pattern = r"^\[\d+\]"
     cleaned_text = re.sub(pattern, "", text)
     return cleaned_text
 def clean_text(text: str) -> str:
     paragraphs = text.split("\n\n")
     cleaned_paragraphs = []
         cleaned_paragraphs.append(cleaned)
     return "\n".join(cleaned_paragraphs)
 def format_and_correct(text: str) -> str:
     prompt = f"""
     Please correct the formatting, grammar, and spelling errors in the following text without changing its content significantly. Ensure proper paragraph breaks and maintain the original content:
     corrected_text = generate(prompt, "Groq", None)
     return clean_text(corrected_text)
 def format_and_correct_para(text: str) -> str:
     paragraphs = text.split("\n")
     corrected_paragraphs = []
     corrected_text = "\n\n".join(corrected_paragraphs)
     return corrected_text
 def format_and_correct_language_check(text: str) -> str:
     tool = language_tool_python.LanguageTool("en-US")
     return tool.correct(text)
         output = model(**tokens)
         output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
         output_norm = {"HUMAN": output_norm[0], "AI": output_norm[1]}
+        return output_norm
+def ai_generated_test(text, model="BC Original"):
     return predict(models[model], tokenizers[model], text)
+def process_text(text, model="BC Original"):
+    # sentences = split_into_sentences(text)
     sentences = nltk.sent_tokenize(text)
     num_sentences = len(sentences)
     scores = defaultdict(list)
     overall_scores = []
+    # Process each chunk of 3 sentences and store the score for each sentence in the chunk
     for i in range(num_sentences):
+        chunk = " ".join(sentences[i : i + 3])
+        if chunk:
+            # result = classifier(chunk)
             result = ai_generated_test(chunk, model)
+            score = result["AI"]
+            for j in range(i, min(i + 3, num_sentences)):
                 scores[j].append(score)
+    # Calculate the average score for each sentence and apply color coding
+    paragraphs = text.split("\n")
+    paragraphs = [s for s in paragraphs if s.strip()]
+    colored_paragraphs = []
+    i = 0
+    for paragraph in paragraphs:
+        temp_sentences = nltk.sent_tokenize(paragraph)
+        colored_sentences = []
+        for sentence in temp_sentences:
+            if scores[i]:
+                avg_score = sum(scores[i]) / len(scores[i])
+                if avg_score >= 0.65:
+                    colored_sentence = f"<span style='background-color:red;'>{sentence}</span>"
+                else:
+                    colored_sentence = sentence
+                colored_sentences.append(colored_sentence)
+                overall_scores.append(avg_score)
+            i = i + 1
+        combined_sentences = " ".join(colored_sentences)
+        print(combined_sentences)
+        colored_paragraphs.append(combined_sentences)
     overall_score = sum(overall_scores) / len(overall_scores)
     overall_score = {"HUMAN": 1 - overall_score, "AI": overall_score}
+    return overall_score, format_references("<br><br>".join(colored_paragraphs))
 ai_check_options = [
     "Polygraf AI Watson (Base Model)",
     "Polygraf AI Sherlock (Advanced Model)",
 ]
 def ai_generated_test_sapling(text: str) -> Dict:
     response = requests.post(
+        "https://api.sapling.ai/api/v1/aidetect", json={"key": "60L9BPSVPIIOEZM0CD1DQWRBPJIUR7SB", "text": f"{text}"}
     )
     return {"AI": response.json()["score"], "HUMAN": 1 - response.json()["score"]}
 class GPT2PPL:
     def __init__(self):
         self.device = device
+        self.model = to_device(GPT2LMHeadModel.from_pretrained("gpt2"))
+        self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
     def __call__(self, text):
+        encodings = self.tokenizer(text, return_tensors="pt")
         encodings = {k: v.to(self.device) for k, v in encodings.items()}
         max_length = self.model.config.n_positions
         stride = 512
         ppl = torch.exp(torch.stack(nlls).sum() / end_loc)
         return {"AI": float(ppl), "HUMAN": 1 - float(ppl)}
 def ai_generated_test_gptzero(text):
     gptzero_model = GPT2PPL()
     result = gptzero_model(text)
     print(result)
     return result, None
 def highlighter_polygraf(text, model="Polygraf AI Watson (Base Model)"):
     return process_text(text=text, model=model)
 def ai_check(text: str, option: str):
     if option.startswith("Polygraf AI"):
         return highlighter_polygraf(text, option)
     """
     return prompt
 def regenerate_prompt(settings: Dict[str, str]) -> str:
     prompt = f"""
     "{settings['generated_article']}"
     """
     return prompt
 def generate_article(
     topic: str,
     keywords: str,
     return clean_text(article)
 def humanize(
     text: str,
     model: str,
     )
     return format_and_correct_language_check(result)
 def update_visibility_api(model: str):
     if model in ["OpenAI GPT 3.5", "OpenAI GPT 4"]:
         return gr.update(visible=True)
     else:
         return gr.update(visible=False)
 def format_references(text: str) -> str:
     lines = text.split("\n")
     references = []
     return "\n\n".join(article_text) + "\n\nReferences:\n" + "\n".join(formatted_refs)
 def generate_and_format(
     topic,
     keywords,
     )
     return format_references(article)
 def create_interface():
     with gr.Blocks(
         theme=gr.themes.Default(
                         step=50,
                         value=1000,
                         label="Article Length",
+                        elem_classes="input-highlight-pink",
                     )
                     with gr.Row():
                     label="Add comments to help edit generated text", interactive=True, visible=False
                 )
                 regenerate_btn = gr.Button("Regenerate Article", variant="primary", visible=False)
+                ai_detector_dropdown = gr.Radio(
+                    choices=ai_check_options, label="Select AI Detector", value="Polygraf AI"
+                )
+                ai_check_btn = gr.Button("AI Check")
+                with gr.Accordion("AI Detection Results", open=True):
                     ai_check_result = gr.Label(label="AI Check Result")
+                    highlighted_text = gr.HTML(label="Sentence Breakdown", visible=False)
                 humanize_btn = gr.Button("Humanize")
                 # humanized_output = gr.Textbox(label="Humanized Article", lines=20, elem_classes=["custom-textbox"])
                 humanized_output = gr.Markdown(label="Humanized Article", value="\n\n\n\n", render=True)
         ai_detector_dropdown.change(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
         output_article.change(become_visible, inputs=output_article, outputs=ai_comments)
         ai_comments.change(become_visible, inputs=output_article, outputs=regenerate_btn)
+        ai_check_btn.click(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
         generate_btn.click(
             fn=generate_and_format,
 if __name__ == "__main__":
     demo = create_interface()
     # demo.launch(server_name="0.0.0.0", share=True, server_port=7890)
+    demo.launch(server_name="0.0.0.0")