Spaces:

polygraf-ai
/

article_writer

Runtime error

App Files Files Community

eljanmahammadli commited on Aug 23, 2024

Commit

e2a79fa

1 Parent(s): db77dd7

added new decoder only LM as a humanizer + UI suport

Browse files

Files changed (2) hide show

app.py +36 -31
humanize.py +120 -35

app.py CHANGED Viewed

@@ -4,22 +4,21 @@ export GOOGLE_APPLICATION_CREDENTIALS="gcp_creds.json"
 """
 import re
-import requests
 from typing import Dict
 from collections import defaultdict
 from datetime import date, datetime
 import gradio as gr
-from scipy.special import softmax
-import language_tool_python
 import nltk
 import torch
 import numpy as np
-from transformers import GPT2LMHeadModel, GPT2TokenizerFast
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 from utils import remove_special_characters, split_text_allow_complete_sentences_nltk
 from google_search import google_search, months, domain_list, build_date
-from humanize import paraphrase_text, device
 from ai_generate import generate
 print(f"Using device: {device}")
@@ -115,7 +114,6 @@ def split_text_from_refs(text: str, sep="\n"):
 def ends_with_references(text):
     # Define a regular expression pattern for variations of "References:"
     pattern = re.compile(r"\b[Rr]eferences:\s*$", re.IGNORECASE | re.MULTILINE)
     # Check if the text ends with any form of "References:"
     return bool(pattern.search(text.strip()))
@@ -400,7 +398,7 @@ def humanize(
 ) -> str:
     print("Humanizing text...")
     body, references = split_text_from_refs(text)
-    result = paraphrase_text(
         text=body,
         model_name=model,
         temperature=temperature,
@@ -442,6 +440,13 @@ def update_structure(format_choice):
         return gr.update(value="Introduction, Body, Conclusion", interactive=True)
 import uuid
 import json
 from datetime import datetime
@@ -859,30 +864,6 @@ def create_interface():
                 """
                 generate_btn = gr.Button("Generate Article", variant="primary")
-                with gr.Accordion("Advanced Humanizer Settings", open=False):
-                    with gr.Row():
-                        model_dropdown = gr.Radio(
-                            choices=[
-                                "Base Model",
-                                "Large Model",
-                                "XL Model",
-                            ],
-                            value="XL Model",
-                            label="Humanizer Model Version",
-                        )
-                    with gr.Row():
-                        temperature_slider = gr.Slider(
-                            minimum=0.5, maximum=2.0, step=0.1, value=1.1, label="Temperature"
-                        )
-                        top_k_slider = gr.Slider(minimum=0, maximum=300, step=25, value=40, label="Top k")
-                    with gr.Row():
-                        repetition_penalty_slider = gr.Slider(
-                            minimum=1.0, maximum=2.0, step=0.1, value=1, label="Repetition Penalty"
-                        )
-                        length_penalty_slider = gr.Slider(
-                            minimum=0.0, maximum=2.0, step=0.1, value=1.0, label="Length Penalty"
-                        )
             with gr.Column(scale=3):
                 with gr.Tab("Text Generator"):
                     output_article = gr.Textbox(label="Generated Article", lines=20)
@@ -899,6 +880,27 @@ def create_interface():
                         ai_check_result = gr.Label(label="AI Check Result")
                         mc_check_result = gr.Label(label="Creator Check Result")
                         highlighted_text = gr.HTML(label="Sentence Breakdown", visible=False)
                     humanize_btn = gr.Button("Humanize")
                     # humanized_output = gr.Markdown(label="Humanized Article", value="\n\n\n\n", render=True)
                     # copy_to_input_btn = gr.Button("Copy to Input for AI Check")
@@ -937,7 +939,10 @@ def create_interface():
         ai_comments.change(regenerate_visible, inputs=output_article, outputs=regenerate_btn)
         ai_check_btn.click(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
         input_format.change(fn=update_structure, inputs=input_format, outputs=input_structure)
         generate_btn.click(
             fn=generate_and_format,

 """
 import re
 from typing import Dict
 from collections import defaultdict
 from datetime import date, datetime
 import gradio as gr
 import nltk
 import torch
 import numpy as np
+from scipy.special import softmax
+import language_tool_python
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 from utils import remove_special_characters, split_text_allow_complete_sentences_nltk
 from google_search import google_search, months, domain_list, build_date
+from humanize import humanize_text, device
 from ai_generate import generate
 print(f"Using device: {device}")
 def ends_with_references(text):
     # Define a regular expression pattern for variations of "References:"
     pattern = re.compile(r"\b[Rr]eferences:\s*$", re.IGNORECASE | re.MULTILINE)
     # Check if the text ends with any form of "References:"
     return bool(pattern.search(text.strip()))
 ) -> str:
     print("Humanizing text...")
     body, references = split_text_from_refs(text)
+    result = humanize_text(
         text=body,
         model_name=model,
         temperature=temperature,
         return gr.update(value="Introduction, Body, Conclusion", interactive=True)
+def update_temperature(model_dropdown):
+    if model_dropdown == "Standard Model":
+        return gr.update(value=1.2, interactive=True)
+    elif model_dropdown == "Advanced Model (Beta)":
+        return gr.update(value=1.0, interactive=True)
 import uuid
 import json
 from datetime import datetime
                 """
                 generate_btn = gr.Button("Generate Article", variant="primary")
             with gr.Column(scale=3):
                 with gr.Tab("Text Generator"):
                     output_article = gr.Textbox(label="Generated Article", lines=20)
                         ai_check_result = gr.Label(label="AI Check Result")
                         mc_check_result = gr.Label(label="Creator Check Result")
                         highlighted_text = gr.HTML(label="Sentence Breakdown", visible=False)
+                    with gr.Accordion("Advanced Humanizer Settings", open=False):
+                        with gr.Row():
+                            model_dropdown = gr.Radio(
+                                choices=["Standard Model", "Advanced Model (Beta)"],
+                                value="Advanced Model (Beta)",
+                                label="Humanizer Model Version",
+                            )
+                        with gr.Row():
+                            temperature_slider = gr.Slider(
+                                minimum=0.5, maximum=2.0, step=0.1, value=1.0, label="Temperature"
+                            )
+                            top_k_slider = gr.Slider(minimum=0, maximum=300, step=25, value=40, label="Top k")
+                        with gr.Row():
+                            repetition_penalty_slider = gr.Slider(
+                                minimum=1.0, maximum=2.0, step=0.1, value=1, label="Repetition Penalty"
+                            )
+                            length_penalty_slider = gr.Slider(
+                                minimum=0.0, maximum=2.0, step=0.1, value=1.0, label="Length Penalty"
+                            )
                     humanize_btn = gr.Button("Humanize")
                     # humanized_output = gr.Markdown(label="Humanized Article", value="\n\n\n\n", render=True)
                     # copy_to_input_btn = gr.Button("Copy to Input for AI Check")
         ai_comments.change(regenerate_visible, inputs=output_article, outputs=regenerate_btn)
         ai_check_btn.click(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
+        # Update the default structure based on the selected format
+        # e.g. "Plain Text" for certain formats
         input_format.change(fn=update_structure, inputs=input_format, outputs=input_structure)
+        model_dropdown.change(fn=update_temperature, inputs=model_dropdown, outputs=temperature_slider)
         generate_btn.click(
             fn=generate_and_format,

humanize.py CHANGED Viewed

@@ -1,15 +1,17 @@
 import gc
 import torch
-from nltk import sent_tokenize
 import nltk
-from tqdm import tqdm
 import gradio as gr
 from peft import PeftModel
 from transformers import T5ForConditionalGeneration, T5Tokenizer
 nltk.download("punkt")
 # autodetect the available device
-GPU_IDX = 1  # which GPU to use
 if torch.cuda.is_available():
     num_gpus = torch.cuda.device_count()
     print(f"Number of available GPUs: {num_gpus}")
@@ -20,26 +22,34 @@ else:
     print("CUDA is not available. Using CPU instead.")
     device = torch.device("cpu")
-batch_size = 64
-# Configuration for models and their adapters
-model_config = {
-    "Base Model": "polygraf-ai/poly-humanizer-base",
-    "Large Model": "polygraf-ai/poly-humanizer-large",
-    "XL Model": "polygraf-ai/poly-humanizer-XL-merged-v2",
-}
-# cache the base models, tokenizers, and adapters
-# initialize model and tokenizer
-models, tokenizers = {}, {}
-for name, path in model_config.items():
-    model = T5ForConditionalGeneration.from_pretrained(path, torch_dtype=torch.bfloat16).to(device)
-    tokenizers[name] = T5Tokenizer.from_pretrained(path)
-    models[name] = model
-    print(f"Loaded model: {name}, Num. params: {model.num_parameters()}")
-def paraphrase_sentences(model, tokenizer, sentences, temperature, repetition_penalty, top_k, length_penalty):
     inputs = ["Please paraphrase this sentence: " + sentence for sentence in sentences]
     inputs = tokenizer(inputs, return_tensors="pt", padding=True, truncation=True).to(model.device)
     outputs = model.generate(
@@ -55,10 +65,61 @@ def paraphrase_sentences(model, tokenizer, sentences, temperature, repetition_pe
     return answers
-def paraphrase_text(
     text,
     progress=gr.Progress(),
-    model_name="Base Model",
     temperature=1.2,
     repetition_penalty=1.0,
     top_k=50,
@@ -69,15 +130,16 @@ def paraphrase_text(
     Paragraphs are stored as a number of sentences per paragraph.
     """
     progress(0, desc="Starting to Humanize")
-    # Select the model, tokenizer, and adapter
-    tokenizer = tokenizers[model_name]
-    model = models[model_name].to(device)
     # Split the text into paragraphs and then into sentences
     paragraphs = text.split("\n")
     all_sentences = []
     sentences_per_paragraph = []
     for paragraph in paragraphs:
         sentences = sent_tokenize(paragraph)
         sentences_per_paragraph.append(len(sentences))
@@ -85,16 +147,39 @@ def paraphrase_text(
     # Process all sentences in batches
     paraphrased_sentences = []
-    for i in progress.tqdm(range(0, len(all_sentences), batch_size)):
-        batch_sentences = all_sentences[i : i + batch_size]
-        paraphrased_batch = paraphrase_sentences(
-            model, tokenizer, batch_sentences, temperature, repetition_penalty, top_k, length_penalty
-        )
-        paraphrased_sentences.extend(paraphrased_batch)
-        # Clear memory
-        torch.cuda.empty_cache()
-        gc.collect()
     # Reconstruct paragraphs
     humanized_paragraphs = []

 import gc
 import torch
 import nltk
+from nltk import sent_tokenize
 import gradio as gr
 from peft import PeftModel
 from transformers import T5ForConditionalGeneration, T5Tokenizer
 nltk.download("punkt")
+GPU_IDX = 1  # which GPU to use, starts from 0
+BATCH_SIZE = 64  # number of sentences to process in one batch
 # autodetect the available device
 if torch.cuda.is_available():
     num_gpus = torch.cuda.device_count()
     print(f"Number of available GPUs: {num_gpus}")
     print("CUDA is not available. Using CPU instead.")
     device = torch.device("cpu")
+# ----------------------------
+# load encoder-decoder (sequence to sequence) language model
+seq2seq = "polygraf-ai/poly-humanizer-XL-merged-v2"
+seq2seq_model = T5ForConditionalGeneration.from_pretrained(seq2seq, torch_dtype=torch.bfloat16).to(device)
+seq2seq_tokenizer = T5Tokenizer.from_pretrained(seq2seq)
+print(f"Loaded model: {seq2seq}, Num. params: {seq2seq_model.num_parameters()}")
+# ----------------------------
+# load decoder-only (causal) language model
+from unsloth import FastLanguageModel
+from unsloth.chat_templates import get_chat_template
+# can only use GPU 0 when using unsloth FastLanguageModel
+max_seq_length = 2048  # any can be chosed since RoPE Scaling is used
+dtype = None  # None for auto detection. Float16for Tesla T4, V100, Bfloat16 for Ampere+
+load_in_4bit = True  # Use 4bit quantization to reduce memory usage
+dec_only = "polygraf-ai/phi-3-mini-rank-128"
+dec_only_model, dec_only_tokenizer = FastLanguageModel.from_pretrained(
+    model_name=dec_only,
+    max_seq_length=max_seq_length,
+    dtype=dtype,
+    load_in_4bit=load_in_4bit,
+    device_map="cuda:0",
+)
+FastLanguageModel.for_inference(dec_only_model)  # native 2x faster inference
+print(f"Loaded model: {dec_only}, Num. params: {dec_only_model.num_parameters()}")
+def humanize_batch_seq2seq(model, tokenizer, sentences, temperature, repetition_penalty, top_k, length_penalty):
     inputs = ["Please paraphrase this sentence: " + sentence for sentence in sentences]
     inputs = tokenizer(inputs, return_tensors="pt", padding=True, truncation=True).to(model.device)
     outputs = model.generate(
     return answers
+def humanize_batch_decoder_only(model, tokenizer, sentences, temperature, repetition_penalty, top_k, length_penalty):
+    pre_prompt = "As a humanizer model, your task is to rewrite the following sentence to make it more human-like. Return only the paraphrased sentence. \n\n"
+    # Construct the messages_batch using the tokenized sentences
+    messages_batch = [{"from": "human", "value": f"{pre_prompt}{sentence}"} for sentence in sentences]
+    # Initialize the tokenizer with the chat template
+    tokenizer = get_chat_template(
+        tokenizer,
+        chat_template="phi-3",
+        mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"},  # ShareGPT style
+    )
+    # Enable native 2x faster inference
+    FastLanguageModel.for_inference(model)
+    # Initialize an empty list to store responses
+    responses = []
+    # Process each message individually
+    for message in messages_batch:
+        # Apply the chat template to the individual message
+        inputs = tokenizer.apply_chat_template(
+            [message],  # Wrap the message in a list
+            tokenize=True,
+            add_generation_prompt=True,  # Must add for generation
+            return_tensors="pt",
+        ).to("cuda")
+        # Generate the response for the individual message
+        outputs = model.generate(
+            input_ids=inputs,
+            max_new_tokens=1024,
+            use_cache=True,
+            do_sample=True,
+            temperature=temperature,
+            repetition_penalty=repetition_penalty,
+            top_k=top_k,
+            length_penalty=length_penalty,
+        )
+        # Decode the output and store it
+        decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=False)
+        responses.append(decoded_output[0])
+    # Print or return the responses
+    generated_sentences = []
+    for idx, response in enumerate(responses):
+        generated_sentence = response.split("<|assistant|>")[1].split("<|end|>")[0].strip()
+        generated_sentences.append(generated_sentence)
+        print(sentences[idx])
+        print(generated_sentence)
+        print()
+    return generated_sentences
+def humanize_text(
     text,
     progress=gr.Progress(),
+    model_name="Standard Model",
     temperature=1.2,
     repetition_penalty=1.0,
     top_k=50,
     Paragraphs are stored as a number of sentences per paragraph.
     """
     progress(0, desc="Starting to Humanize")
+    # Map model names to their respective processing functions
+    model_map = {"Standard Model": humanize_batch_seq2seq, "Advanced Model (Beta)": humanize_batch_decoder_only}
+    assert model_name in model_map, f"Invalid model name: {model_name}"
+    process_function = model_map[model_name]
     # Split the text into paragraphs and then into sentences
     paragraphs = text.split("\n")
     all_sentences = []
     sentences_per_paragraph = []
     for paragraph in paragraphs:
         sentences = sent_tokenize(paragraph)
         sentences_per_paragraph.append(len(sentences))
     # Process all sentences in batches
     paraphrased_sentences = []
+    current_batch_size = BATCH_SIZE
+    i = 0
+    while i < len(all_sentences):
+        try:
+            batch_sentences = all_sentences[i : i + current_batch_size]
+            # Call the selected processing function
+            paraphrased_batch = process_function(
+                seq2seq_model if model_name == "Standard Model" else dec_only_model,
+                seq2seq_tokenizer if model_name == "Standard Model" else dec_only_tokenizer,
+                batch_sentences,
+                temperature,
+                repetition_penalty,
+                top_k,
+                length_penalty,
+            )
+            paraphrased_sentences.extend(paraphrased_batch)
+            i += current_batch_size  # Move to the next batch
+            torch.cuda.empty_cache()
+            gc.collect()
+            progress.update(i / len(all_sentences))
+        except RuntimeError as e:
+            if "out of memory" in str(e):
+                # Reduce the batch size by half and retry
+                current_batch_size = max(1, current_batch_size // 2)
+                print(f"Out of memory, reducing batch size to {current_batch_size}. Retrying...")
+                torch.cuda.empty_cache()
+                gc.collect()
+            else:
+                raise e
     # Reconstruct paragraphs
     humanized_paragraphs = []