Spaces:

polygraf-ai
/

article_writer

Runtime error

App Files Files Community

minko186 commited on Aug 7, 2024

Commit

078999d

1 Parent(s): 24a982b

sync with humanize.py from main

Browse files

Files changed (1) hide show

humanize.py +102 -851

humanize.py CHANGED Viewed

@@ -1,864 +1,115 @@
-"""
-nohup python3 app.py &
-"""
-import re
-import requests
-from typing import Dict
-from collections import defaultdict
-from datetime import date
-import gradio as gr
-from scipy.special import softmax
-import language_tool_python
-import nltk
 import torch
-from transformers import GPT2LMHeadModel, GPT2TokenizerFast
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
-from utils import remove_special_characters
-from plagiarism import google_search, months, domain_list, build_date
-from humanize import paraphrase_text, device
-from ai_generate import generate
-print(f"Using device: {device}")
-models = {
-    "Polygraf AI (Base Model)": AutoModelForSequenceClassification.from_pretrained(
-        "polygraf-ai/bc-roberta-openai-2sent"
-    ).to(device),
-    "Polygraf AI (Advanced Model)": AutoModelForSequenceClassification.from_pretrained(
-        "polygraf-ai/bc_combined_3sent"
-    ).to(device),
-}
-tokenizers = {
-    "Polygraf AI (Base Model)": AutoTokenizer.from_pretrained("polygraf-ai/bc-roberta-openai-2sent"),
-    "Polygraf AI (Advanced Model)": AutoTokenizer.from_pretrained("polygraf-ai/bc_combined_3sent"),
 }
-# grammar correction tool
-tool = language_tool_python.LanguageTool("en-US")
-# Function to move model to the appropriate device
-def to_device(model):
-    return model.to(device)
-def copy_to_input(text):
-    return text
-def remove_bracketed_numbers(text):
-    pattern = r"^\[\d+\]"
-    cleaned_text = re.sub(pattern, "", text)
-    return cleaned_text
-def clean_text(text: str) -> str:
-    paragraphs = text.split("\n\n")
-    cleaned_paragraphs = []
-    for paragraph in paragraphs:
-        cleaned = re.sub(r"\s+", " ", paragraph).strip()
-        cleaned = re.sub(r"(?<=\.) ([a-z])", lambda x: x.group(1).upper(), cleaned)
-        cleaned_paragraphs.append(cleaned)
-    return "\n".join(cleaned_paragraphs)
-def split_text_from_refs(text: str, sep="\n"):
-    lines = text.split("\n")
-    references = []
-    article_text = []
-    index_pattern = re.compile(r"\[(\d+)\]")
-    in_references = False
-    for line in lines:
-        if line.strip().lower() == "references" or line.strip().lower() == "references:":
-            in_references = True
-            continue
-        if line.strip().lower().startswith("references:"):
-            in_references = True
-        if in_references:
-            matches = index_pattern.split(line)
-            for match in matches:
-                if match.strip() and not match.isdigit() and not match.strip().lower().startswith("references:"):
-                    references.append(match.strip())
-        else:
-            article_text.append(line)
-    formatted_refs = []
-    for i, ref in enumerate(references, 1):
-        ref = remove_bracketed_numbers(ref)
-        formatted_refs.append(f"[{i}] {ref}{sep}")
-    return "\n\n".join(article_text), f"{sep}{sep}References:{sep}" + f"{sep}".join(formatted_refs)
-def ends_with_references(text):
-    # Define a regular expression pattern for variations of "References:"
-    pattern = re.compile(r"\b[Rr]eferences:\s*$", re.IGNORECASE | re.MULTILINE)
-    # Check if the text ends with any form of "References:"
-    return bool(pattern.search(text.strip()))
-def format_and_correct_language_check(text: str) -> str:
-    return tool.correct(text)
-def predict(model, tokenizer, text):
-    text = remove_special_characters(text)
-    bc_token_size = 256
-    with torch.no_grad():
-        model.eval()
-        tokens = tokenizer(
-            text,
-            padding="max_length",
-            truncation=True,
-            max_length=bc_token_size,
-            return_tensors="pt",
-        ).to(device)
-        output = model(**tokens)
-        output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
-        output_norm = {"HUMAN": output_norm[0], "AI": output_norm[1]}
-        return output_norm
-def ai_generated_test(text, model="BC Original"):
-    return predict(models[model], tokenizers[model], text)
-def detection_polygraf(text, model="BC Original"):
-    # sentences = split_into_sentences(text)
-    sentences = nltk.sent_tokenize(text)
-    num_sentences = len(sentences)
-    scores = defaultdict(list)
-    overall_scores = []
-    # Process each chunk of 3 sentences and store the score for each sentence in the chunk
-    for i in range(num_sentences):
-        chunk = " ".join(sentences[i : i + 3])
-        if chunk:
-            # result = classifier(chunk)
-            result = ai_generated_test(chunk, model)
-            score = result["AI"]
-            for j in range(i, min(i + 3, num_sentences)):
-                scores[j].append(score)
-    # Calculate the average score for each sentence and apply color coding
-    paragraphs = text.split("\n")
-    paragraphs = [s for s in paragraphs if s.strip()]
-    colored_paragraphs = []
-    i = 0
-    for paragraph in paragraphs:
-        temp_sentences = nltk.sent_tokenize(paragraph)
-        colored_sentences = []
-        for sentence in temp_sentences:
-            if scores[i]:
-                avg_score = sum(scores[i]) / len(scores[i])
-                if avg_score >= 0.65:
-                    colored_sentence = f"<span style='background-color:red;'>{sentence}</span>"
-                else:
-                    colored_sentence = sentence
-                colored_sentences.append(colored_sentence)
-                overall_scores.append(avg_score)
-            i = i + 1
-        combined_sentences = " ".join(colored_sentences)
-        colored_paragraphs.append(combined_sentences)
-    overall_score = sum(overall_scores) / len(overall_scores)
-    overall_score = {"HUMAN": 1 - overall_score, "AI": overall_score}
-    return overall_score, "<br><br>".join(colored_paragraphs)
-ai_check_options = [
-    "Polygraf AI (Base Model)",
-    "Polygraf AI (Advanced Model)",
-]
-def ai_generated_test_sapling(text: str) -> Dict:
-    response = requests.post(
-        "https://api.sapling.ai/api/v1/aidetect", json={"key": "60L9BPSVPIIOEZM0CD1DQWRBPJIUR7SB", "text": f"{text}"}
-    )
-    return {"AI": response.json()["score"], "HUMAN": 1 - response.json()["score"]}
-class GPT2PPL:
-    def __init__(self):
-        self.device = device
-        self.model = to_device(GPT2LMHeadModel.from_pretrained("gpt2"))
-        self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
-    def __call__(self, text):
-        encodings = self.tokenizer(text, return_tensors="pt")
-        encodings = {k: v.to(self.device) for k, v in encodings.items()}
-        max_length = self.model.config.n_positions
-        stride = 512
-        seq_len = encodings.input_ids.size(1)
-        nlls = []
-        for i in range(0, seq_len, stride):
-            begin_loc = max(i + stride - max_length, 0)
-            end_loc = min(i + stride, seq_len)
-            trg_len = end_loc - i
-            input_ids = encodings.input_ids[:, begin_loc:end_loc].to(self.device)
-            target_ids = input_ids.clone()
-            target_ids[:, :-trg_len] = -100
-            with torch.no_grad():
-                outputs = self.model(input_ids, labels=target_ids)
-                neg_log_likelihood = outputs.loss * trg_len
-            nlls.append(neg_log_likelihood)
-        ppl = torch.exp(torch.stack(nlls).sum() / end_loc)
-        return {"AI": float(ppl), "HUMAN": 1 - float(ppl)}
-def ai_generated_test_gptzero(text):
-    gptzero_model = GPT2PPL()
-    result = gptzero_model(text)
-    return result, None
-def highlighter_polygraf(text, model="Polygraf AI (Base Model)"):
-    body, references = split_text_from_refs(text, "<br>")
-    score, text = detection_polygraf(text=body, model=model)
-    text = text + "<br>" + references
-    return score, text
-def ai_check(text: str, option: str):
-    if option.startswith("Polygraf AI"):
-        return highlighter_polygraf(text, option)
-    elif option == "Sapling AI":
-        return ai_generated_test_sapling(text)
-    elif option == "GPTZero":
-        return ai_generated_test_gptzero(text)
     else:
-        return highlighter_polygraf(text, option)
-def generate_prompt(settings: Dict[str, str]) -> str:
-    prompt = f"""
-    I am a {settings['role']}
-    Write a {settings['article_length']} words (around) {settings['format']} on {settings['topic']}.
-    Style and Tone:
-    - Writing style: {settings['writing_style']}
-    - Tone: {settings['tone']}
-    - Target audience: {settings['user_category']}
-    Content:
-    - Depth: {settings['depth_of_content']}
-    - Structure: {', '.join(settings['structure'])}
-    Keywords to incorporate:
-    {', '.join(settings['keywords'])}
-    Additional requirements:
-    - Don't start with "Here is a...", start with the requested text directly
-    - Include {settings['num_examples']} relevant examples or case studies
-    - Incorporate data or statistics from {', '.join(settings['references'])}
-    - End with a {settings['conclusion_type']} conclusion
-    - Add a "References" section in the format "References:\n" at the end with at least 3 credible sources, formatted as [1], [2], etc. with each source on their own line
-    - Do not make any headline, title bold.
-    {settings['sources']}
-    Ensure proper paragraph breaks for better readability.
-    Avoid any references to artificial intelligence, language models, or the fact that this is generated by an AI, and do not mention something like here is the article etc.
-    """
-    return prompt
-def regenerate_prompt(settings: Dict[str, str]) -> str:
-    prompt = f"""
-    I am a {settings['role']}
-    "{settings['generated_article']}"
-    Edit the given text based on user comments.
-    Comments:
-    - Don't start with "Here is a...", start with the requested text directly
-    - {settings['user_comments']}
-    - The original content should not be changed. Make minor modifications based on user comments above.
-    - Keep the references the same as the given text in the same format.
-    - Do not make any headline, title bold.
-    {settings['sources']}
-    Ensure proper paragraph breaks for better readability.
-    Avoid any references to artificial intelligence, language models, or the fact that this is generated by an AI, and do not mention something like here is the article etc.
-    """
-    return prompt
-def generate_article(
-    input_role: str,
-    topic: str,
-    keywords: str,
-    article_length: str,
-    format: str,
-    writing_style: str,
-    tone: str,
-    user_category: str,
-    depth_of_content: str,
-    structure: str,
-    references: str,
-    num_examples: str,
-    conclusion_type: str,
-    ai_model: str,
-    content_string: str,
-    # api_key: str = None,
-    pdf_file_input=None,
-    generated_article: str = None,
-    user_comments: str = None,
-) -> str:
-    settings = {
-        "role": input_role,
-        "topic": topic,
-        "keywords": [k.strip() for k in keywords.split(",")],
-        "article_length": article_length,
-        "format": format,
-        "writing_style": writing_style,
-        "tone": tone,
-        "user_category": user_category,
-        "depth_of_content": depth_of_content,
-        "structure": [s.strip() for s in structure.split(",")],
-        "references": [r.strip() for r in references.split(",")],
-        "num_examples": num_examples,
-        "conclusion_type": conclusion_type,
-        "sources": content_string,
-        "generated_article": generated_article,
-        "user_comments": user_comments,
-    }
-    if generated_article:
-        prompt = regenerate_prompt(settings)
-    else:
-        prompt = generate_prompt(settings)
-    print("Generated Prompt...\n", prompt)
-    article = generate(
-        prompt,
-        ai_model,
-        pdf_file_input,  # api_key
-    )
-    return clean_text(article)
-def humanize(
-    text: str,
-    model: str,
-    temperature: float = 1.2,
-    repetition_penalty: float = 1,
-    top_k: int = 50,
-    length_penalty: float = 1,
-) -> str:
-    body, references = split_text_from_refs(text)
-    result = paraphrase_text(
-        text=body,
-        model_name=model,
         temperature=temperature,
         repetition_penalty=repetition_penalty,
         top_k=top_k,
         length_penalty=length_penalty,
     )
-    result = result + "\n\n" + references
-    return format_and_correct_language_check(result)
-def update_visibility_api(model: str):
-    if model in ["OpenAI GPT 3.5", "OpenAI GPT 4"]:
-        return gr.update(visible=True)
-    else:
-        return gr.update(visible=False)
-def format_references(text: str) -> str:
-    lines = text.split("\n")
-    references = []
-    article_text = []
-    index_pattern = re.compile(r"\[(\d+)\]")
-    in_references = False
-    for line in lines:
-        if line.strip().lower() == "references" or line.strip().lower() == "references:":
-            in_references = True
-            continue
-        if line.strip().lower().startswith("references:"):
-            in_references = True
-        if in_references:
-            matches = index_pattern.split(line)
-            for match in matches:
-                if match.strip() and not match.isdigit() and not match.strip().lower().startswith("references:"):
-                    references.append(match.strip())
-        else:
-            article_text.append(line)
-    formatted_refs = []
-    for i, ref in enumerate(references, 1):
-        ref = remove_bracketed_numbers(ref)
-        formatted_refs.append(f"[{i}] {ref}\n")
-    return "\n\n".join(article_text) + "\n\nReferences:\n" + "\n".join(formatted_refs)
-def generate_and_format(
-    input_role,
-    topic,
-    keywords,
-    article_length,
-    format,
-    writing_style,
-    tone,
-    user_category,
-    depth_of_content,
-    structure,
-    references,
-    num_examples,
-    conclusion_type,
-    ai_model,
-    # api_key,
-    google_search_check,
-    year_from,
-    month_from,
-    day_from,
-    year_to,
-    month_to,
-    day_to,
-    domains_to_include,
-    include_sites,
-    exclude_sites,
-    pdf_file_input,
-    generated_article: str = None,
-    user_comments: str = None,
 ):
-    content_string = ""
-    url_content = None
-    if google_search_check:
-        date_from = build_date(year_from, month_from, day_from)
-        date_to = build_date(year_to, month_to, day_to)
-        sorted_date = f"date:r:{date_from}:{date_to}"
-        final_query = topic
-        if include_sites:
-            site_queries = [f"site:{site.strip()}" for site in include_sites.split(",")]
-            final_query += " " + " OR ".join(site_queries)
-        if exclude_sites:
-            exclude_queries = [f"-site:{site.strip()}" for site in exclude_sites.split(",")]
-            final_query += " " + " ".join(exclude_queries)
-        print(f"Google Search Query: {final_query}")
-        url_content = google_search(final_query, sorted_date, domains_to_include)
-        content_string = "\n".join(
-            f"{url.strip()}: \n{content.strip()[:2000]}" for url, content in url_content.items()
-        )
-        content_string = (
-            "Use the trusted information here from the URLs and add them as References:\n" + content_string
-        )
-    article = generate_article(
-        input_role,
-        topic,
-        keywords,
-        article_length,
-        format,
-        writing_style,
-        tone,
-        user_category,
-        depth_of_content,
-        structure,
-        references,
-        num_examples,
-        conclusion_type,
-        ai_model,
-        content_string,
-        # api_key,
-        pdf_file_input,
-        generated_article,
-        user_comments,
-    )
-    if ends_with_references(article) and url_content is not None:
-        for url in url_content.keys():
-            article += f"\n{url}"
-    return format_references(article)
-def create_interface():
-    with gr.Blocks(
-        theme=gr.themes.Default(
-            primary_hue=gr.themes.colors.pink, secondary_hue=gr.themes.colors.yellow, neutral_hue=gr.themes.colors.gray
-        ),
-        css="""
-            .input-highlight-pink block_label {background-color: #008080}
-            """,
-    ) as demo:
-        today = date.today()
-        # dd/mm/YY
-        d1 = today.strftime("%d/%B/%Y")
-        d1 = d1.split("/")
-        gr.Markdown("# Polygraf AI Content Writer", elem_classes="text-center text-3xl mb-6")
-        with gr.Row():
-            with gr.Column(scale=2):
-                with gr.Group():
-                    gr.Markdown("## Article Configuration", elem_classes="text-xl mb-4")
-                    input_role = gr.Textbox(label="I am a", placeholder="Enter your role", value="Student")
-                    input_topic = gr.Textbox(
-                        label="Topic",
-                        placeholder="Enter the main topic of your article",
-                        elem_classes="input-highlight-pink",
-                    )
-                    input_keywords = gr.Textbox(
-                        label="Keywords",
-                        placeholder="Enter comma-separated keywords",
-                        elem_classes="input-highlight-yellow",
-                    )
-                    with gr.Row():
-                        input_format = gr.Dropdown(
-                            choices=[
-                                "Article",
-                                "Essay",
-                                "Blog post",
-                                "Report",
-                                "Research paper",
-                                "News article",
-                                "White paper",
-                                "LinkedIn post",
-                                "X (Twitter) post",
-                                "Instagram Video Content",
-                                "TikTok Video Content",
-                                "Facebook post",
-                            ],
-                            value="Article",
-                            label="Format",
-                            elem_classes="input-highlight-turquoise",
-                        )
-                    input_length = gr.Slider(
-                        minimum=50,
-                        maximum=5000,
-                        step=50,
-                        value=300,
-                        label="Article Length",
-                        elem_classes="input-highlight-pink",
-                    )
-                    with gr.Row():
-                        input_writing_style = gr.Dropdown(
-                            choices=[
-                                "Formal",
-                                "Informal",
-                                "Technical",
-                                "Conversational",
-                                "Journalistic",
-                                "Academic",
-                                "Creative",
-                            ],
-                            value="Formal",
-                            label="Writing Style",
-                            elem_classes="input-highlight-yellow",
-                        )
-                        input_tone = gr.Dropdown(
-                            choices=["Friendly", "Professional", "Neutral", "Enthusiastic", "Skeptical", "Humorous"],
-                            value="Professional",
-                            label="Tone",
-                            elem_classes="input-highlight-turquoise",
-                        )
-                    input_user_category = gr.Dropdown(
-                        choices=[
-                            "Students",
-                            "Professionals",
-                            "Researchers",
-                            "General Public",
-                            "Policymakers",
-                            "Entrepreneurs",
-                        ],
-                        value="General Public",
-                        label="Target Audience",
-                        elem_classes="input-highlight-pink",
-                    )
-                    input_depth = gr.Dropdown(
-                        choices=[
-                            "Surface-level overview",
-                            "Moderate analysis",
-                            "In-depth research",
-                            "Comprehensive study",
-                        ],
-                        value="Moderate analysis",
-                        label="Depth of Content",
-                        elem_classes="input-highlight-yellow",
-                    )
-                    input_structure = gr.Dropdown(
-                        choices=[
-                            "Introduction, Body, Conclusion",
-                            "Abstract, Introduction, Methods, Results, Discussion, Conclusion",
-                            "Executive Summary, Problem Statement, Analysis, Recommendations, Conclusion",
-                            "Introduction, Literature Review, Methodology, Findings, Analysis, Conclusion",
-                        ],
-                        value="Introduction, Body, Conclusion",
-                        label="Structure",
-                        elem_classes="input-highlight-turquoise",
-                    )
-                    input_references = gr.Dropdown(
-                        choices=[
-                            "Academic journals",
-                            "Industry reports",
-                            "Government publications",
-                            "News outlets",
-                            "Expert interviews",
-                            "Case studies",
-                        ],
-                        value="News outlets",
-                        label="References",
-                        elem_classes="input-highlight-pink",
-                    )
-                    input_num_examples = gr.Dropdown(
-                        choices=["1-2", "3-4", "5+"],
-                        value="1-2",
-                        label="Number of Examples/Case Studies",
-                        elem_classes="input-highlight-yellow",
-                    )
-                    input_conclusion = gr.Dropdown(
-                        choices=["Summary", "Call to Action", "Future Outlook", "Thought-provoking Question"],
-                        value="Call to Action",
-                        label="Conclusion Type",
-                        elem_classes="input-highlight-turquoise",
-                    )
-                    gr.Markdown("# Search Options", elem_classes="text-center text-3xl mb-6")
-                    with gr.Row():
-                        google_search_check = gr.Checkbox(label="Enable Google Search For Recent Sources", value=False)
-                    with gr.Group(visible=True) as search_options:
-                        with gr.Row():
-                            include_sites = gr.Textbox(
-                                label="Include Specific Websites",
-                                placeholder="Enter comma-separated keywords",
-                                elem_classes="input-highlight-yellow",
-                            )
-                        with gr.Row():
-                            exclude_sites = gr.Textbox(
-                                label="Exclude Specific Websites",
-                                placeholder="Enter comma-separated keywords",
-                                elem_classes="input-highlight-yellow",
-                            )
-                        with gr.Row():
-                            domains_to_include = gr.Dropdown(
-                                domain_list,
-                                value=domain_list,
-                                multiselect=True,
-                                label="Domains To Include",
-                            )
-                        with gr.Row():
-                            month_from = gr.Dropdown(
-                                choices=months,
-                                label="From Month",
-                                value="January",
-                                interactive=True,
-                            )
-                            day_from = gr.Textbox(label="From Day", value="01")
-                            year_from = gr.Textbox(label="From Year", value="2000")
-                        with gr.Row():
-                            month_to = gr.Dropdown(
-                                choices=months,
-                                label="To Month",
-                                value=d1[1],
-                                interactive=True,
-                            )
-                            day_to = gr.Textbox(label="To Day", value=d1[0])
-                            year_to = gr.Textbox(label="To Year", value=d1[2])
-                    gr.Markdown("# Add Optional PDF File with Information", elem_classes="text-center text-3xl mb-6")
-                    pdf_file_input = gr.File(label="Upload PDF")
-                with gr.Group():
-                    gr.Markdown("## AI Model Configuration", elem_classes="text-xl mb-4")
-                    ai_generator = gr.Dropdown(
-                        choices=[
-                            "OpenAI GPT 4",
-                            "OpenAI GPT 4o",
-                            "OpenAI GPT 4o Mini",
-                            "Claude Sonnet 3.5",
-                            "Gemini 1.5 Pro",
-                            "LLaMA 3",
-                        ],
-                        value="OpenAI GPT 4o Mini",
-                        label="AI Model",
-                        elem_classes="input-highlight-pink",
-                    )
-                    # input_api = gr.Textbox(label="API Key", visible=False)
-                    # ai_generator.change(update_visibility_api, ai_generator, input_api)
-                generate_btn = gr.Button("Generate Article", variant="primary")
-                with gr.Accordion("Advanced Humanizer Settings", open=False):
-                    with gr.Row():
-                        model_dropdown = gr.Radio(
-                            choices=[
-                                "Base Model",
-                                "Large Model",
-                                "XL Model",
-                                # "XL Law Model",
-                                # "XL Marketing Model",
-                                # "XL Child Style Model",
-                            ],
-                            value="Large Model",
-                            label="Humanizer Model Version",
-                        )
-                    with gr.Row():
-                        temperature_slider = gr.Slider(
-                            minimum=0.5, maximum=2.0, step=0.1, value=1.3, label="Temperature"
-                        )
-                        top_k_slider = gr.Slider(minimum=0, maximum=300, step=25, value=50, label="Top k")
-                    with gr.Row():
-                        repetition_penalty_slider = gr.Slider(
-                            minimum=1.0, maximum=2.0, step=0.1, value=1, label="Repetition Penalty"
-                        )
-                        length_penalty_slider = gr.Slider(
-                            minimum=0.0, maximum=2.0, step=0.1, value=1.0, label="Length Penalty"
-                        )
-            with gr.Column(scale=3):
-                output_article = gr.Textbox(label="Generated Article", lines=20)
-                ai_comments = gr.Textbox(
-                    label="Add comments to help edit generated text", interactive=True, visible=False
-                )
-                regenerate_btn = gr.Button("Regenerate Article", variant="primary", visible=False)
-                ai_detector_dropdown = gr.Radio(
-                    choices=ai_check_options, label="Select AI Detector", value="Polygraf AI"
-                )
-                ai_check_btn = gr.Button("AI Check")
-                with gr.Accordion("AI Detection Results", open=True):
-                    ai_check_result = gr.Label(label="AI Check Result")
-                    highlighted_text = gr.HTML(label="Sentence Breakdown", visible=False)
-                humanize_btn = gr.Button("Humanize")
-                # humanized_output = gr.Textbox(label="Humanized Article", lines=20, elem_classes=["custom-textbox"])
-                humanized_output = gr.Markdown(label="Humanized Article", value="\n\n\n\n", render=True)
-                copy_to_input_btn = gr.Button("Copy to Input for AI Check")
-        def regenerate_visible(text):
-            if text:
-                return gr.update(visible=True)
-            else:
-                return gr.update(visible=False)
-        def highlight_visible(text):
-            if text.startswith("Polygraf"):
-                return gr.update(visible=True)
-            else:
-                return gr.update(visible=False)
-        def search_visible(toggle):
-            if toggle:
-                return gr.update(visible=True)
-            else:
-                return gr.update(visible=False)
-        google_search_check.change(search_visible, inputs=google_search_check, outputs=search_options)
-        ai_detector_dropdown.change(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
-        output_article.change(regenerate_visible, inputs=output_article, outputs=ai_comments)
-        ai_comments.change(regenerate_visible, inputs=output_article, outputs=regenerate_btn)
-        ai_check_btn.click(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
-        generate_btn.click(
-            fn=generate_and_format,
-            inputs=[
-                input_role,
-                input_topic,
-                input_keywords,
-                input_length,
-                input_format,
-                input_writing_style,
-                input_tone,
-                input_user_category,
-                input_depth,
-                input_structure,
-                input_references,
-                input_num_examples,
-                input_conclusion,
-                ai_generator,
-                # input_api,
-                google_search_check,
-                year_from,
-                month_from,
-                day_from,
-                year_to,
-                month_to,
-                day_to,
-                domains_to_include,
-                include_sites,
-                exclude_sites,
-                pdf_file_input,
-            ],
-            outputs=[output_article],
-        )
-        regenerate_btn.click(
-            fn=generate_and_format,
-            inputs=[
-                input_role,
-                input_topic,
-                input_keywords,
-                input_length,
-                input_format,
-                input_writing_style,
-                input_tone,
-                input_user_category,
-                input_depth,
-                input_structure,
-                input_references,
-                input_num_examples,
-                input_conclusion,
-                ai_generator,
-                # input_api,
-                google_search_check,
-                year_from,
-                month_from,
-                day_from,
-                year_to,
-                month_to,
-                day_to,
-                domains_to_include,
-                pdf_file_input,
-                output_article,
-                include_sites,
-                exclude_sites,
-                ai_comments,
-            ],
-            outputs=[output_article],
-        )
-        ai_check_btn.click(
-            fn=ai_check,
-            inputs=[output_article, ai_detector_dropdown],
-            outputs=[ai_check_result, highlighted_text],
-        )
-        humanize_btn.click(
-            fn=humanize,
-            inputs=[
-                output_article,
-                model_dropdown,
-                temperature_slider,
-                repetition_penalty_slider,
-                top_k_slider,
-                length_penalty_slider,
-            ],
-            outputs=[humanized_output],
-        )
-        copy_to_input_btn.click(
-            fn=copy_to_input,
-            inputs=[humanized_output],
-            outputs=[output_article],
         )
-    return demo
-if __name__ == "__main__":
-    demo = create_interface()
-    # demo.launch(server_name="0.0.0.0", share=True, server_port=7890)
-    demo.launch(server_name="0.0.0.0")

+import gc
 import torch
+from nltk import sent_tokenize
+import nltk
+from tqdm import tqdm
+import gradio as gr
+from peft import PeftModel
+from transformers import T5ForConditionalGeneration, T5Tokenizer
+nltk.download("punkt")
+# autodetect the available device
+GPU_IDX = 1  # which GPU to use
+if torch.cuda.is_available():
+    num_gpus = torch.cuda.device_count()
+    print(f"Number of available GPUs: {num_gpus}")
+    assert GPU_IDX < num_gpus, f"GPU index {GPU_IDX} not available."
+    device = torch.device(f"cuda:{GPU_IDX}")
+    print(f"Using GPU: {GPU_IDX}")
+else:
+    print("CUDA is not available. Using CPU instead.")
+    device = torch.device("cpu")
+batch_size = 64
+# Configuration for models and their adapters
+model_config = {
+    "Base Model": "polygraf-ai/poly-humanizer-base",
+    "Large Model": "polygraf-ai/poly-humanizer-large",
+    "XL Model": "polygraf-ai/poly-humanizer-XL-adapter",
 }
+# cache the base models, tokenizers, and adapters
+# initialize model and tokenizer
+models, tokenizers = {}, {}
+for name, path in model_config.items():
+    if name == "XL Model":
+        model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl", torch_dtype=torch.bfloat16).to(device)
+        model = PeftModel.from_pretrained(model, path, torch_dtype=torch.bfloat16, is_trainable=False)
+        model = model.merge_and_unload()
+        models[name] = model
+        tokenizers[name] = T5Tokenizer.from_pretrained("google/flan-t5-xl")
     else:
+        model = T5ForConditionalGeneration.from_pretrained(path, torch_dtype=torch.bfloat16).to(device)
+        models[name] = model
+        tokenizers[name] = T5Tokenizer.from_pretrained(path)
+    print(f"Loaded model: {name}, Num. params: {model.num_parameters()}")
+def paraphrase_sentences(model, tokenizer, sentences, temperature, repetition_penalty, top_k, length_penalty):
+    inputs = ["Please paraphrase this sentence: " + sentence for sentence in sentences]
+    inputs = tokenizer(inputs, return_tensors="pt", padding=True, truncation=True).to(model.device)
+    outputs = model.generate(
+        **inputs,
+        do_sample=True,
         temperature=temperature,
         repetition_penalty=repetition_penalty,
+        max_length=128,
         top_k=top_k,
         length_penalty=length_penalty,
     )
+    answers = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
+    return answers
+def paraphrase_text(
+    text,
+    progress=gr.Progress(),
+    model_name="Base Model",
+    temperature=1.2,
+    repetition_penalty=1.0,
+    top_k=50,
+    length_penalty=1.0,
 ):
+    """
+    Optimization here is to feed all sentences at once to the model.
+    Paragraphs are stored as a number of sentences per paragraph.
+    """
+    progress(0, desc="Starting to Humanize")
+    # Select the model, tokenizer, and adapter
+    tokenizer = tokenizers[model_name]
+    model = models[model_name].to(device)
+    # Split the text into paragraphs and then into sentences
+    paragraphs = text.split("\n")
+    all_sentences = []
+    sentences_per_paragraph = []
+    for paragraph in paragraphs:
+        sentences = sent_tokenize(paragraph)
+        sentences_per_paragraph.append(len(sentences))
+        all_sentences.extend(sentences)
+    # Process all sentences in batches
+    paraphrased_sentences = []
+    for i in progress.tqdm(range(0, len(all_sentences), batch_size)):
+        batch_sentences = all_sentences[i : i + batch_size]
+        paraphrased_batch = paraphrase_sentences(
+            model, tokenizer, batch_sentences, temperature, repetition_penalty, top_k, length_penalty
         )
+        paraphrased_sentences.extend(paraphrased_batch)
+        # Clear memory
+        torch.cuda.empty_cache()
+        gc.collect()
+    # Reconstruct paragraphs
+    humanized_paragraphs = []
+    sentence_index = 0
+    for num_sentences in sentences_per_paragraph:
+        humanized_paragraph = " ".join(paraphrased_sentences[sentence_index : sentence_index + num_sentences])
+        humanized_paragraphs.append(humanized_paragraph)
+        sentence_index += num_sentences
+    humanized_text = "\n".join(humanized_paragraphs)
+    return humanized_text