Spaces:

polygraf-ai
/

article_writer

Runtime error

File size: 48,460 Bytes

"""
nohup python3 app.py &
export GOOGLE_APPLICATION_CREDENTIALS="gcp_creds.json"
"""

import gc
import re
import uuid
import json
from typing import Dict
from collections import defaultdict
from datetime import date, datetime

import nltk
import torch
import numpy as np
import gradio as gr
import language_tool_python
from scipy.special import softmax
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from google.cloud import storage

if gr.NO_RELOAD:
    from humanize import humanize_text, device

    # humanize_text = None
    # device = None
    from utils import remove_special_characters, split_text_allow_complete_sentences_nltk
    from google_search import google_search, months, domain_list, build_date
    from ai_generate import generate, citations_to_html, remove_citations, display_cited_text, llm_wrapper
    from youtube import transcribe

    # nltk.download("punkt_tab")

    print(f"Using device: {device}")
    print("Loading AI detection models...")
    models = {
        "Polygraf AI (Base Model)": AutoModelForSequenceClassification.from_pretrained(
            "polygraf-ai/bc-roberta-openai-2sent"
        ).to(device),
        "Polygraf AI (Advanced Model)": AutoModelForSequenceClassification.from_pretrained(
            "polygraf-ai/bc_combined_3sent"
        ).to(device),
    }
    tokenizers = {
        "Polygraf AI (Base Model)": AutoTokenizer.from_pretrained("polygraf-ai/bc-roberta-openai-2sent"),
        "Polygraf AI (Advanced Model)": AutoTokenizer.from_pretrained("polygraf-ai/bc_combined_3sent"),
    }

    # grammar correction tool
    tool = language_tool_python.LanguageTool("en-US")

    # source detection model
    MC_TOKEN_SIZE = 256
    TEXT_MC_MODEL_PATH = "polygraf-ai/mc-model"
    MC_LABEL_MAP = ["OpenAI GPT", "Mistral", "CLAUDE", "Gemini", "Grammar Enhancer"]
    text_mc_tokenizer = AutoTokenizer.from_pretrained(TEXT_MC_MODEL_PATH)
    print("Loading Source detection model...")
    text_mc_model = AutoModelForSequenceClassification.from_pretrained(TEXT_MC_MODEL_PATH).to(device)


def generate_cited_html(cited_text, citations: dict):
    cited_text = cited_text.replace("\n", "<br>")
    html_code = """
    <style>
    .reference-container {
        position: relative;
        display: inline-block;
    }
    .reference-btn {
        display: inline-block;
        width: 20px; /* Reduced width */
        height: 20px; /* Reduced height */
        border-radius: 50%;
        background-color: #e33a89; /* Pink color for the button */
        color: white;
        text-align: center;
        line-height: 20px; /* Adjusted line-height */
        cursor: pointer;
        font-weight: bold;
        margin-right: 5px;
        transition: background-color 0.3s ease, transform 0.3s ease;
    }
    .reference-btn:hover {
        background-color: #ff69b4; /* Lighter pink on hover */
        transform: scale(1.1); /* Slightly enlarge on hover */
    }
    .reference-popup {
        display: none;
        position: absolute;
        z-index: 1;
        top: 100%;
        background-color: #f9f9f9;
        border: 1px solid #ddd;
        padding: 15px;
        border-radius: 4px;
        box-shadow: 0 2px 5px rgba(0,0,0,0.2);
        width: calc(min(90vw, 400px));
        max-height: calc(min(80vh, 300px));
        overflow-y: auto;
    }
    .reference-popup .close-btn {
        float: right;
        cursor: pointer;
        font-weight: bold;
        color: white;
        font-size: 16px;
        padding: 0;
        width: 20px;
        height: 20px;
        text-align: center;
        line-height: 20px;
        background-color: #ff4c4c;
        border-radius: 2px;
        transition: transform 0.3s ease, background-color 0.3s ease;
    }
    .reference-popup .close-btn:hover {
        transform: scale(1.2);
        background-color: #ff3333;
    }
    input[type="radio"] {
        position: absolute;
        opacity: 0;
        pointer-events: none;
    }
    input[type="radio"]:checked + .reference-popup {
        display: block;
    }

    /* Additional styling for distinct sections */
    .reference-popup strong {
        font-weight: bold;
        color: #333;
        display: block;
        margin-bottom: 5px;
    }
    .reference-popup p {
        margin: 0 0 10px 0;
        padding: 0;
    }
    .reference-popup .source {
        margin-bottom: 10px;
        font-size: 14px;
        font-weight: bold;
        color: #1e90ff;
    }
    .reference-popup .content {
        margin-bottom: 10px;
        font-size: 13px;
        color: #555;
    }

    @media (prefers-color-scheme: dark) {
        .reference-btn {
            background-color: #1e90ff;
        }
        .reference-popup {
            background-color: #2c2c2c;
            border-color: #444;
            color: #f1f1f1;
        }
        .reference-popup .close-btn {
            background-color: #ff4c4c;
        }
        .reference-popup .close-btn:hover {
            background-color: #ff3333;
        }
        .reference-popup strong {
            color: #ddd;
        }
        .reference-popup .source {
            color: #1e90ff;
        }
        .reference-popup .content {
            color: #bbb;
        }
    }
    </style>
    <script>
    document.addEventListener('click', (event) => {
        const containers = document.querySelectorAll('.reference-container');
        containers.forEach(container => {
            const rect = container.getBoundingClientRect();
            const popup = container.querySelector('.reference-popup');

            // Reset alignment
            popup.style.left = '';
            popup.style.right = '';

            const popupWidth = popup.offsetWidth;
            const viewportWidth = window.innerWidth;

            // If the popup would go off the right edge
            if (rect.right + popupWidth > viewportWidth) {
                popup.style.right = '0';  // Align popup to the right
            }
            // If the popup would go off the left edge
            else if (rect.left - popupWidth < 0) {
                popup.style.left = '0';  // Align popup to the left
            }
            // Otherwise center it
            else {
                popup.style.left = '50%';
                popup.style.transform = 'translateX(-50%)'; // Center the popup
            }
        });
    });

    function closeReferencePanes() {
        document.querySelectorAll('input[name="reference"]').forEach((input) => {
            input.checked = false;
        });
    }
    </script>
    <div style="height: 600px; overflow-y: auto; overflow-x: auto;">
    """

    # Function to replace each citation with a reference button
    citation_numbers = {}
    next_number = 1
    citation_count = 0  # To track unique instances of each citation
    references = "<b>References:</b><br><br>"

    def replace_citations(match):
        nonlocal citation_count, next_number, references
        citation_id = match.group(1)  # Extract citation number from the match
        ref_data = citations.get(int(citation_id))

        # If reference data is not found, return the original text
        if not ref_data:
            return match.group(0)

        # Getting PDF file from gradio path
        if "/var/tmp/gradio/" in ref_data["source"]:
            ref_data["source"] = ref_data["source"].split("/")[-1]

        # remove new line artifacts from scraping / parsing
        ref_data["content"] = ref_data["content"].replace("\n", " ")

        # Check if source is a URL, make it clickable if so
        if ref_data["source"].startswith("http"):
            source_html = f'<a href="{ref_data["source"]}" target="_blank" class="source">{ref_data["source"]}</a>'
        else:
            source_html = f'<span class="source">{ref_data["source"]}</span>'

        if citation_id not in citation_numbers:
            citation_numbers[citation_id] = next_number
            source = ref_data["source"]
            content = ref_data["content"]
            references += f"[{next_number}] {source}<br>-     {content}<br><br>"
            next_number += 1
        citation_number = citation_numbers[citation_id]

        # Unique id for each reference button and popup
        unique_id = f"{citation_id}-{citation_count}"
        citation_count += 1

        # HTML code for the reference button and popup with formatted content
        button_html = f"""
        <span class="reference-container">
        <label for="ref-toggle-{unique_id}" class="reference-btn" onclick="closeReferencePanes(); document.getElementById('ref-toggle-{unique_id}').checked = true;">{citation_number}</label>
        <input type="radio" id="ref-toggle-{unique_id}" name="reference" />
        <span class="reference-popup">
            <span class="close-btn" onclick="document.getElementById('ref-toggle-{unique_id}').checked = false;">&times;</span>
            <strong>Source:</strong> {source_html}
            <strong>Content:</strong> <p class="content">{ref_data["content"]}</p>
        </span>
        </span>
        """
        return button_html

    # Replace inline citations in the text with the generated HTML
    html_code += re.sub(r"<(\d+)>", replace_citations, cited_text)
    html_code += "<br><br>" + references
    html_code += "</div>"
    return html_code


# Function to move model to the appropriate device
def to_device(model):
    return model.to(device)


def copy_to_input(text):
    return text


def remove_bracketed_numbers(text):
    pattern = r"^\[\d+\]"
    cleaned_text = re.sub(pattern, "", text)
    return cleaned_text


def clean_text(text: str) -> str:
    paragraphs = text.split("\n\n")
    cleaned_paragraphs = []
    for paragraph in paragraphs:
        cleaned = re.sub(r"\s+", " ", paragraph).strip()
        cleaned = re.sub(r"(?<=\.) ([a-z])", lambda x: x.group(1).upper(), cleaned)
        cleaned_paragraphs.append(cleaned)
    cleaned_paragraphs = [item for item in cleaned_paragraphs if item.strip()]
    return "\n\n".join(cleaned_paragraphs)


def format_references(text: str) -> str:
    body, references = split_text_from_refs(text)
    return body + references


def split_text_from_refs(text: str, sep="\n"):
    lines = text.split("\n")
    references = []
    article_text = []
    index_pattern = re.compile(r"\[(\d+)\]")
    in_references = False

    for line in lines:
        if line == "":
            continue
        match = re.search(r"[Rr]eferences:", line, re.DOTALL)
        if line.strip().lower() == "references" or line.strip().lower() == "references:":
            in_references = True
            continue
        if line.strip().lower().startswith("references:"):
            in_references = True
        if match:
            in_references = True
            line = line[match.end() :]
        if in_references:
            matches = index_pattern.split(line)
            for match in matches:
                if match.strip() and not match.isdigit() and not match.strip().lower().startswith("references:"):
                    references.append(match.strip())
        else:
            article_text.append(line.strip())

    if len(references) > 0:
        formatted_refs = []
        for i, ref in enumerate(references, 1):
            ref = remove_bracketed_numbers(ref)
            formatted_refs.append(f"[{i}] {ref}{sep}")
        formatted_refs = f"{sep}{sep}References:{sep}{sep}" + f"{sep}".join(formatted_refs)
    else:
        formatted_refs = ""

    body = f"{sep}{sep}".join(article_text)

    return body, formatted_refs


def ends_with_references(text):
    # Define a regular expression pattern for variations of "References:"
    pattern = re.compile(r"\b[Rr]eferences:\s*$", re.IGNORECASE | re.MULTILINE)
    # Check if the text ends with any form of "References:"
    return bool(pattern.search(text.strip()))


def format_and_correct_language_check(text: str) -> str:
    return tool.correct(text)


def predict(model, tokenizer, text):
    text = remove_special_characters(text)
    bc_token_size = 256
    with torch.no_grad():
        model.eval()
        tokens = tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=bc_token_size,
            return_tensors="pt",
        ).to(device)
        output = model(**tokens)
        output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
        output_norm = {"HUMAN": output_norm[0], "AI": output_norm[1]}
        torch.cuda.empty_cache()
        gc.collect()
        return output_norm


def ai_generated_test(text, model="BC Original"):
    return predict(models[model], tokenizers[model], text)


def detection_polygraf(text, model="BC Original"):
    # sentences = split_into_sentences(text)
    sentences = nltk.sent_tokenize(text)
    num_sentences = len(sentences)
    scores = defaultdict(list)

    overall_scores = []

    # Process each chunk of 3 sentences and store the score for each sentence in the chunk
    for i in range(num_sentences):
        chunk = " ".join(sentences[i : i + 3])
        if chunk:
            # result = classifier(chunk)
            result = ai_generated_test(chunk, model)
            score = result["AI"]
            for j in range(i, min(i + 3, num_sentences)):
                scores[j].append(score)

    # Calculate the average score for each sentence and apply color coding
    paragraphs = text.split("\n")
    paragraphs = [s for s in paragraphs if s.strip()]
    colored_paragraphs = []
    i = 0
    for paragraph in paragraphs:
        temp_sentences = nltk.sent_tokenize(paragraph)
        colored_sentences = []
        for sentence in temp_sentences:
            if scores[i]:
                avg_score = sum(scores[i]) / len(scores[i])
                if avg_score >= 0.70:
                    colored_sentence = f"<span style='background-color:red;'>{sentence}</span>"
                elif avg_score >= 0.55:
                    colored_sentence = f"<span style='background-color:GoldenRod;'>{sentence}</span>"
                else:
                    colored_sentence = sentence
                colored_sentences.append(colored_sentence)
                overall_scores.append(avg_score)
            i = i + 1
        combined_sentences = " ".join(colored_sentences)
        colored_paragraphs.append(combined_sentences)

    overall_score = sum(overall_scores) / len(overall_scores)
    overall_score = {"HUMAN": 1 - overall_score, "AI": overall_score}
    return overall_score, "<br><br>".join(colored_paragraphs)


ai_check_options = [
    "Polygraf AI (Base Model)",
    "Polygraf AI (Advanced Model)",
]


def predict_mc(text):
    with torch.no_grad():
        text_mc_model.eval()
        tokens = text_mc_tokenizer(
            text,
            padding="max_length",
            truncation=True,
            return_tensors="pt",
            max_length=MC_TOKEN_SIZE,
        ).to(device)
        output = text_mc_model(**tokens)
        output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
        torch.cuda.empty_cache()
        gc.collect()
        return output_norm


def predict_mc_scores(input, bc_score):
    mc_scores = []
    segments_mc = split_text_allow_complete_sentences_nltk(input, type_det="mc", tokenizer=text_mc_tokenizer)
    samples_len_mc = len(split_text_allow_complete_sentences_nltk(input, type_det="mc", tokenizer=text_mc_tokenizer))
    for i in range(samples_len_mc):
        cleaned_text_mc = remove_special_characters(segments_mc[i])
        mc_score = predict_mc(cleaned_text_mc)
        mc_scores.append(mc_score)
    mc_scores_array = np.array(mc_scores)
    average_mc_scores = np.mean(mc_scores_array, axis=0)
    mc_score_list = average_mc_scores.tolist()
    mc_score = {}
    for score, label in zip(mc_score_list, MC_LABEL_MAP):
        mc_score[label.upper()] = score

    sum_prob = 1 - bc_score["HUMAN"]
    for key, value in mc_score.items():
        mc_score[key] = value * sum_prob
    print("MC Score:", mc_score)
    if sum_prob < 0.01:
        mc_score = {}

    return mc_score


def highlighter_polygraf(text, model="Polygraf AI (Base Model)"):
    text = remove_citations(text)
    body, references = split_text_from_refs(text)
    score, text = detection_polygraf(text=body, model=model)
    mc_score = predict_mc_scores(body, score)  # mc score
    text = text + references.replace("\n", "<br>")
    return score, text, mc_score


def ai_check(history: list, option: str):
    text = history[-1][1]
    if option.startswith("Polygraf AI"):
        return highlighter_polygraf(text, option)
    else:
        return highlighter_polygraf(text, option)


def generate_prompt(settings: Dict[str, str]) -> str:
    settings["keywords"] = [item for item in settings["keywords"] if item.strip()]
    #    - Add a "References" section in the format "References:" on a new line after the requested text, formatted as [1], [2], etc. with each source on their own line
    prompt = f"""
Write a {settings['article_length']} words (around) {settings['format']} on {settings['topic']}.\n
    """
    if settings["context"]:
        prompt += f"""
    Context:
    - {settings['context']}
        """
    prompt += f"""
    Style and Tone:
    - Writing style: {settings['writing_style']}
    - Tone: {settings['tone']}
    - Target audience: {settings['user_category']}

    Content:
    - Depth: {settings['depth_of_content']}
    - Structure: {', '.join(settings['structure'])}
    """
    if len(settings["keywords"]) > 0:
        prompt += f"""
    Keywords to incorporate:
    {', '.join(settings['keywords'])}
        """
    prompt += f"""
    Additional requirements:
    - Don't start with "Here is a...", start with the requested text directly
    - End with a {settings['conclusion_type']} conclusion
    - Do not make any headline, title bold.
    - Ensure proper paragraph breaks for better readability.
    - Avoid any references to artificial intelligence, language models, or the fact that this is generated by an AI, and do not mention something like here is the article etc.
    - Adhere to any format structure provided to the system if any.
    """
    return prompt


def regenerate_prompt(settings: Dict[str, str]) -> str:
    prompt = f"""
    I am a {settings['role']}
    "{settings['generated_article']}"
    Edit the given text based on user comments.
    User Comments:
    - {settings['user_comments']}

    Requirements:
    - Don't start with "Here is a...", start with the requested text directly
    - The original content should not be changed. Make minor modifications based on user comments above.
    - Keep the references the same as the given text in the same format.
    - Do not make any headline, title bold.
    Context:
    - {settings['context']}

    Ensure proper paragraph breaks for better readability.
    Avoid any references to artificial intelligence, language models, or the fact that this is generated by an AI, and do not mention something like here is the article etc.
    """
    return prompt


def generate_article(
    input_role: str,
    topic: str,
    context: str,
    keywords: str,
    article_length: str,
    format: str,
    writing_style: str,
    tone: str,
    user_category: str,
    depth_of_content: str,
    structure: str,
    references: str,
    num_examples: str,
    conclusion_type: str,
    ai_model: str,
    url_content: str = None,
    api_key: str = None,
    pdf_file_input: list[str] = None,
    generated_article: str = None,
    user_comments: str = None,
    yt_content: str = None,
) -> str:
    settings = {
        "role": input_role,
        "topic": topic,
        "context": context,
        "keywords": [k.strip() for k in keywords.split(",")],
        "article_length": article_length,
        "format": format,
        "writing_style": writing_style,
        "tone": tone,
        "user_category": user_category,
        "depth_of_content": depth_of_content,
        "structure": [s.strip() for s in structure.split(",")],
        "references": [r.strip() for r in references.split(",")],
        "num_examples": num_examples,
        "conclusion_type": conclusion_type,
        "generated_article": generated_article,
        "user_comments": user_comments,
    }

    if generated_article:
        prompt = regenerate_prompt(settings)
    else:
        prompt = generate_prompt(settings)

    print("Generated Prompt...\n", prompt)
    article, citations = generate(
        prompt=prompt,
        input_role=input_role,
        topic=topic,
        context=context,
        model=ai_model,
        url_content=url_content,
        path=pdf_file_input,
        # path=["./final_report.pdf"], # TODO: reset
        temperature=1,
        max_length=2048,
        api_key=api_key,
        sys_message="",
        yt_content=yt_content,
    )
    return article, citations


def get_history(history):
    # return history
    history_formatted = []
    for entry in history:
        history_formatted.append((entry[0], entry[1]))
    return history_formatted


def clear_history():
    # Return empty list for history state and display
    return [], []


def humanize(
    model: str,
    cited_text: str,
    temperature: float = 1.2,
    repetition_penalty: float = 1,
    top_k: int = 50,
    length_penalty: float = 1,
    history=None,
) -> str:
    print("Humanizing text...")
    # body, references = split_text_from_refs(text)
    cited_text = history[-1][1]
    citations = history[-1][2]
    article = humanize_text(
        text=cited_text,
        model_name=model,
        temperature=temperature,
        repetition_penalty=repetition_penalty,
        top_k=top_k,
        length_penalty=length_penalty,
    )
    # result = result + references
    # corrected_text = format_and_correct_language_check(result)
    article = clean_text(article)
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    history.append((f"Humanized Text | {timestamp}\nInput: {model}", article, citations))
    latest_humanizer_data = {
        "original text": cited_text,
        "humanized text": article,
        "citations": citations,  # can remove saving citations
        "metadata": {
            "temperature": temperature,
            "repetition_penalty": repetition_penalty,
            "top_k": top_k,
            "length_penalty": length_penalty,
        },
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    }
    return generate_cited_html(article, citations), history, latest_humanizer_data


def update_visibility_api(model: str):
    if model in ["OpenAI GPT 3.5", "OpenAI GPT 4"]:
        return gr.update(visible=True)
    else:
        return gr.update(visible=False)


# Function to update the default selected structure based on the selected format
def update_structure(format_choice):
    # List of formats that should use "Plain Text"
    plain_text_formats = [
        "TikTok Video Content",
        "Instagram Video Content",
        "LinkedIn post",
        "X (Twitter) post",
        "Facebook post",
        "Email",
    ]

    # Set the appropriate default structure based on the selected format
    if format_choice in plain_text_formats:
        return gr.update(value="Plain Text", interactive=True)
    else:
        return gr.update(value="Introduction, Body, Conclusion", interactive=True)


# Initialize Google Cloud Storage client
client = storage.Client()
bucket_name = "ai-source-detection"
bucket = client.bucket(bucket_name)


def save_to_cloud_storage(
    article,
    topic,
    input_role,
    context,
    keywords,
    article_length,
    format,
    writing_style,
    tone,
    user_category,
    depth_of_content,
    structure,
    references,
    num_examples,
    conclusion_type,
    ai_model,
    url_content,
    generated_article,
    user_comments,
    timestamp,
):
    """Save generated article and metadata to Google Cloud Storage within a specific folder."""
    # Create a unique filename
    file_id = str(uuid.uuid4())

    # Define the file path and name in the bucket
    folder_path = "ai-writer/"
    file_name = f"{folder_path}{timestamp.replace(' ', '_').replace(':', '-')}_{file_id}.json"

    # Create a dictionary with the article and all relevant metadata
    data = {
        "article": article,
        "metadata": {
            "topic": topic,
            "input_role": input_role,
            "context": context,
            "keywords": keywords,
            "article_length": article_length,
            "format": format,
            "writing_style": writing_style,
            "tone": tone,
            "user_category": user_category,
            "depth_of_content": depth_of_content,
            "structure": structure,
            "references": references,
            "num_examples": num_examples,
            "conclusion_type": conclusion_type,
            "ai_model": ai_model,
            "url_content": url_content,
            "generated_article": generated_article,
            "user_comments": user_comments,
            "timestamp": timestamp,
        },
    }

    # Convert data to JSON string
    json_data = json.dumps(data)

    # Create a blob and upload to GCS
    blob = bucket.blob(file_name)
    blob.upload_from_string(json_data, content_type="application/json")

    return f"Data saved as {file_name} in GCS."


def save_humanizer_feedback_to_cloud_storage(data, humanizer_feedback):
    """Save generated article and metadata to Google Cloud Storage within a specific folder."""
    if data:
        try:
            data["user_feedback"] = humanizer_feedback
            # Create a unique filename
            file_id = str(uuid.uuid4())

            # Define the file path and name in the bucket
            folder_path = "ai-writer/humanizer-feedback/"
            file_name = f"{folder_path}{data['timestamp'].replace(' ', '_').replace(':', '-')}_{file_id}.json"

            # Convert data to JSON string
            json_data = json.dumps(data)

            # Create a blob and upload to GCS
            blob = bucket.blob(file_name)
            blob.upload_from_string(json_data, content_type="application/json")
            gr.Info("Successfully reported. Thank you for the feedback!")
        except Exception:
            gr.Warning("Report not saved.")
    else:
        gr.Warning("Nothing humanized to save yet!")


scholar_urls = [
    "arxiv.org",
    "aclanthology.org",
    "ieeexplore.ieee.org",
    "researchgate.net",
    # "scholar.google.com",
    "springer.com",
    # "sciencedirect.com", # 400
    # "onlinelibrary.wiley.com", # 400
    "jstor.org",  # 400
    "semanticscholar.org",
    "biorxiv.org",
    "medrxiv.org",
    "ssrn.com",
    "pubmed.ncbi.nlm.nih.gov",
    "cochranelibrary.com",
]


def generate_and_format(
    input_role,
    topic,
    context,
    keywords,
    article_length,
    format,
    writing_style,
    tone,
    user_category,
    depth_of_content,
    structure,
    references,
    num_examples,
    conclusion_type,
    google_search_check,
    scholar_mode_check,
    year_from,
    month_from,
    day_from,
    year_to,
    month_to,
    day_to,
    domains_to_include,
    include_sites,
    exclude_sites,
    pdf_file_input,
    history=None,
    yt_url: str = None,
    ai_model="OpenAI GPT 4o",
    api_key=None,
    generated_article: str = None,
    user_comments: str = None,
):
    url_content = None
    if google_search_check:
        gr.Info("Searching internet for relevant content...")
        date_from = build_date(year_from, month_from, day_from)
        date_to = build_date(year_to, month_to, day_to)
        sorted_date = f"date:r:{date_from}:{date_to}"
        final_query = llm_wrapper(
            input_role, topic, context, model="OpenAI GPT 4o", task_type="internet", temperature=0.7
        )
        if scholar_mode_check:
            # scholar_site_queries = [f"site:{site.strip()}" for site in scholar_urls]
            # final_query += " " + " OR ".join(scholar_site_queries)
            pass
        else:
            if include_sites:
                site_queries = [f"site:{site.strip()}" for site in include_sites.split(",")]
                final_query += " " + " OR ".join(site_queries)
            if exclude_sites:
                exclude_queries = [f"-site:{site.strip()}" for site in exclude_sites.split(",")]
                final_query += " " + " ".join(exclude_queries)
        print(f"Google Search Query: {final_query}")
        url_content = google_search(final_query, sorted_date, domains_to_include, scholar_mode_check)

    yt_content = {}
    if yt_url:
        gr.Info("Transcribing YouTube video...")
        transcribed_text = transcribe(yt_url)
        gr.Info("Transcription completed. Generating article...")
        yt_content[yt_url] = transcribed_text

    # topic_context = topic + ", " + context
    article, citations = generate_article(
        input_role,
        topic,
        context,
        keywords,
        article_length,
        format,
        writing_style,
        tone,
        user_category,
        depth_of_content,
        structure,
        references,
        num_examples,
        conclusion_type,
        ai_model,
        url_content,
        api_key,
        pdf_file_input,
        generated_article,
        user_comments,
        yt_content,
    )
    # if ends_with_references(article) and url_content is not None:
    #     for url in url_content.keys():
    #         article += f"\n{url}"

    article = clean_text(display_cited_text(article))
    # reference_formatted = format_references(article)
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    history.append((f"Generated Text | {timestamp}\nInput: {topic}", article, citations))

    # Save the article and metadata to Cloud Storage
    # We dont save if there is PDF input for privacy reasons
    if pdf_file_input is None:
        save_message = save_to_cloud_storage(
            article,
            topic,
            input_role,
            context,
            keywords,
            article_length,
            format,
            writing_style,
            tone,
            user_category,
            depth_of_content,
            structure,
            references,
            num_examples,
            conclusion_type,
            ai_model,
            url_content,
            generated_article,
            user_comments,
            timestamp,
        )
        print(save_message)
    return generate_cited_html(article, citations), history


# def create_interface():
with gr.Blocks(
    theme=gr.themes.Default(
        primary_hue=gr.themes.colors.pink, secondary_hue=gr.themes.colors.yellow, neutral_hue=gr.themes.colors.gray
    ),
    css="""
        .input-highlight-pink block_label {background-color: #008080}
        """,
) as demo:
    history = gr.State([])
    latest_humanizer_data = gr.State()
    today = date.today()
    # dd/mm/YY
    d1 = today.strftime("%d/%B/%Y")
    d1 = d1.split("/")
    gr.Markdown("# Polygraf AI Content Writer", elem_classes="text-center text-3xl mb-6")

    with gr.Row():
        with gr.Column(scale=1):
            with gr.Group():
                gr.Markdown("## Article Configuration", elem_classes="text-xl mb-4")
                input_role = gr.Textbox(label="I am a", placeholder="Enter your role", value="Student")
                input_topic = gr.Textbox(
                    label="Topic",
                    placeholder="Enter the main topic of your article",
                    elem_classes="input-highlight-pink",
                )
                input_context = gr.Textbox(
                    label="Context",
                    placeholder="Provide some context for your topic",
                    elem_classes="input-highlight-pink",
                )
                input_keywords = gr.Textbox(
                    label="Keywords",
                    placeholder="Enter comma-separated keywords",
                    elem_classes="input-highlight-yellow",
                )

                with gr.Row():
                    input_format = gr.Dropdown(
                        choices=[
                            "Article",
                            "Essay",
                            "Blog post",
                            "Report",
                            "Research paper",
                            "News article",
                            "White paper",
                            "Email",
                            "LinkedIn post",
                            "X (Twitter) post",
                            "Instagram Video Content",
                            "TikTok Video Content",
                            "Facebook post",
                        ],
                        value="Article",
                        label="Format",
                        elem_classes="input-highlight-turquoise",
                    )

                input_length = gr.Slider(
                    minimum=50,
                    maximum=5000,
                    step=50,
                    value=300,
                    label="Article Length",
                    elem_classes="input-highlight-pink",
                )

                with gr.Row():
                    input_writing_style = gr.Dropdown(
                        choices=[
                            "Formal",
                            "Informal",
                            "Technical",
                            "Conversational",
                            "Journalistic",
                            "Academic",
                            "Creative",
                        ],
                        value="Formal",
                        label="Writing Style",
                        elem_classes="input-highlight-yellow",
                    )
                    input_tone = gr.Dropdown(
                        choices=["Friendly", "Professional", "Neutral", "Enthusiastic", "Skeptical", "Humorous"],
                        value="Professional",
                        label="Tone",
                        elem_classes="input-highlight-turquoise",
                    )

                input_user_category = gr.Dropdown(
                    choices=[
                        "Students",
                        "Professionals",
                        "Researchers",
                        "General Public",
                        "Policymakers",
                        "Entrepreneurs",
                    ],
                    value="General Public",
                    label="Target Audience",
                    elem_classes="input-highlight-pink",
                )
                input_depth = gr.Dropdown(
                    choices=[
                        "Surface-level overview",
                        "Moderate analysis",
                        "In-depth research",
                        "Comprehensive study",
                    ],
                    value="Moderate analysis",
                    label="Depth of Content",
                    elem_classes="input-highlight-yellow",
                )
                input_structure = gr.Dropdown(
                    choices=[
                        "Introduction, Body, Conclusion",
                        "Abstract, Introduction, Methods, Results, Discussion, Conclusion",
                        "Executive Summary, Problem Statement, Analysis, Recommendations, Conclusion",
                        "Introduction, Literature Review, Methodology, Findings, Analysis, Conclusion",
                        "Plain Text",
                    ],
                    value="Introduction, Body, Conclusion",
                    label="Structure",
                    elem_classes="input-highlight-turquoise",
                    interactive=True,
                )
                input_references = gr.Dropdown(
                    choices=[
                        "Academic journals",
                        "Industry reports",
                        "Government publications",
                        "News outlets",
                        "Expert interviews",
                        "Case studies",
                    ],
                    value="News outlets",
                    label="References",
                    elem_classes="input-highlight-pink",
                )
                input_num_examples = gr.Dropdown(
                    choices=["1-2", "3-4", "5+"],
                    value="1-2",
                    label="Number of Examples/Case Studies",
                    elem_classes="input-highlight-yellow",
                )
                input_conclusion = gr.Dropdown(
                    choices=["Summary", "Call to Action", "Future Outlook", "Thought-provoking Question"],
                    value="Call to Action",
                    label="Conclusion Type",
                    elem_classes="input-highlight-turquoise",
                )
                gr.Markdown("# Search Options", elem_classes="text-center text-3xl mb-6")
                google_default = False
                with gr.Row():
                    google_search_check = gr.Checkbox(
                        label="Enable Internet Search For Recent Sources", value=google_default
                    )
                with gr.Group(visible=google_default) as search_options:
                    with gr.Row():
                        scholar_mode_check = gr.Checkbox(label="Enable Scholar Mode", value=False)
                    with gr.Group(visible=True) as site_options:
                        with gr.Row():
                            include_sites = gr.Textbox(
                                label="Include Specific Websites",
                                placeholder="Enter comma-separated keywords",
                                elem_classes="input-highlight-yellow",
                            )
                        with gr.Row():
                            exclude_sites = gr.Textbox(
                                label="Exclude Specific Websites",
                                placeholder="Enter comma-separated keywords",
                                elem_classes="input-highlight-yellow",
                            )
                        with gr.Row():
                            domains_to_include = gr.Dropdown(
                                domain_list,
                                value=domain_list,
                                multiselect=True,
                                label="Domains To Include",
                            )
                    with gr.Row():
                        month_from = gr.Dropdown(
                            choices=months,
                            label="From Month",
                            value="January",
                            interactive=True,
                        )
                        day_from = gr.Textbox(label="From Day", value="01")
                        year_from = gr.Textbox(label="From Year", value="2000")

                    with gr.Row():
                        month_to = gr.Dropdown(
                            choices=months,
                            label="To Month",
                            value=d1[1],
                            interactive=True,
                        )
                        day_to = gr.Textbox(label="To Day", value=d1[0])
                        year_to = gr.Textbox(label="To Year", value=d1[2])

                gr.Markdown("# Add Optional PDF Files with Information", elem_classes="text-center text-3xl mb-6")
                pdf_file_input = gr.File(label="Upload PDF(s)", file_count="multiple", file_types=[".pdf"])
                gr.Markdown("# Add Youtube Video Link", elem_classes="text-center text-3xl mb-6")
                yt_url = gr.Textbox(
                    label="Youtube Video Link",
                    placeholder="Enter the link of the video",
                    elem_classes="input-highlight-pink",
                )
            """
            # NOTE: HIDE AI MODEL SELECTION
            with gr.Group():
                gr.Markdown("## AI Model Configuration", elem_classes="text-xl mb-4")
                ai_generator = gr.Dropdown(
                    choices=[
                        "OpenAI GPT 4",
                        "OpenAI GPT 4o",
                        "OpenAI GPT 4o Mini",
                        "Claude Sonnet 3.5",
                        "Gemini 1.5 Pro",
                        "LLaMA 3",
                    ],
                    value="OpenAI GPT 4o Mini",
                    label="AI Model",
                    elem_classes="input-highlight-pink",
                )
            input_api = gr.Textbox(label="API Key", visible=False)
            ai_generator.change(update_visibility_api, ai_generator, input_api)
            """
            generate_btn = gr.Button("Generate Article", variant="primary")

        with gr.Column(scale=2):
            with gr.Tab("Text Generator"):
                output_article = gr.HTML(
                    value="""<div style="height: 600px;"></div>""",
                    label="Generated Article",
                )
                with gr.Accordion("Regenerate Article", open=False):
                    ai_comments = gr.Textbox(
                        label="Add comments to help edit generated text", interactive=True, visible=True
                    )
                    regenerate_btn = gr.Button("Regenerate Article", variant="primary", visible=True)

                ai_detector_dropdown = gr.Dropdown(
                    choices=ai_check_options, label="Select AI Detector", value="Polygraf AI (Base Model)"
                )
                ai_check_btn = gr.Button("AI Check")

                with gr.Accordion("AI Detection Results", open=True):
                    ai_check_result = gr.Label(label="AI Check Result")
                    mc_check_result = gr.Label(label="Creator Check Result")
                    highlighted_text = gr.HTML(label="Sentence Breakdown", visible=False)

                with gr.Accordion("Advanced Humanizer Settings", open=False):
                    with gr.Row():
                        model_dropdown = gr.Radio(
                            choices=["Advanced Model (Beta)"],
                            value="Advanced Model (Beta)",
                            label="Humanizer Model Version",
                        )
                    with gr.Row():
                        temperature_slider = gr.Slider(
                            minimum=0.1, maximum=2.0, step=0.1, value=1.0, label="Temperature"
                        )
                        top_k_slider = gr.Slider(minimum=0, maximum=300, step=25, value=40, label="Top k")
                    with gr.Row():
                        repetition_penalty_slider = gr.Slider(
                            minimum=1.0, maximum=2.0, step=0.1, value=1, label="Repetition Penalty"
                        )
                        length_penalty_slider = gr.Slider(
                            minimum=0.0, maximum=2.0, step=0.1, value=1.0, label="Length Penalty"
                        )

                humanize_btn = gr.Button("Humanize")
                with gr.Row(equal_height=False):
                    with gr.Column():
                        humanizer_feedback = gr.Textbox(label="Add optional feedback on humanizer")
                    with gr.Column():
                        report_humanized_btn = gr.Button("Report Humanized Text", variant="primary", visible=True)
                # humanized_output = gr.Markdown(label="Humanized Article", value="\n\n\n\n", render=True)
                # copy_to_input_btn = gr.Button("Copy to Input for AI Check")

            with gr.Tab("History"):
                history_chat = gr.Chatbot(label="Generation History", height=1000)
                clear_history_btn = gr.Button("Clear History")
                clear_history_btn.click(clear_history, outputs=[history, history_chat])
                """
                # NOTE: REMOVED REFRESH BUTTON
                refresh_button = gr.Button("Refresh History")
                refresh_button.click(get_history, outputs=history_chat)
                """

    def regenerate_visible(text):
        if text:
            return gr.update(visible=True)
        else:
            return gr.update(visible=False)

    def highlight_visible(text):
        if text.startswith("Polygraf"):
            return gr.update(visible=True)
        else:
            return gr.update(visible=False)

    def search_visible(toggle):
        if toggle:
            return gr.update(visible=True)
        else:
            return gr.update(visible=False)

    google_search_check.change(
        lambda toggle: gr.update(visible=toggle), inputs=google_search_check, outputs=search_options
    )
    # ai_detector_dropdown.change(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
    # output_article.change(regenerate_visible, inputs=output_article, outputs=ai_comments)
    # ai_comments.change(regenerate_visible, inputs=output_article, outputs=regenerate_btn)
    ai_check_btn.click(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)

    # Update the default structure based on the selected format
    # e.g. "Plain Text" for certain formats
    input_format.change(fn=update_structure, inputs=input_format, outputs=input_structure)
    report_humanized_btn.click(
        save_humanizer_feedback_to_cloud_storage, inputs=[latest_humanizer_data, humanizer_feedback]
    )

    generate_btn.click(
        fn=generate_and_format,
        inputs=[
            input_role,
            input_topic,
            input_context,
            input_keywords,
            input_length,
            input_format,
            input_writing_style,
            input_tone,
            input_user_category,
            input_depth,
            input_structure,
            input_references,
            input_num_examples,
            input_conclusion,
            # ai_generator,
            # input_api,
            google_search_check,
            scholar_mode_check,
            year_from,
            month_from,
            day_from,
            year_to,
            month_to,
            day_to,
            domains_to_include,
            include_sites,
            exclude_sites,
            pdf_file_input,
            history,
            yt_url,
        ],
        outputs=[output_article, history],
    )

    regenerate_btn.click(
        fn=generate_and_format,
        inputs=[
            input_role,
            input_topic,
            input_context,
            input_keywords,
            input_length,
            input_format,
            input_writing_style,
            input_tone,
            input_user_category,
            input_depth,
            input_structure,
            input_references,
            input_num_examples,
            input_conclusion,
            # ai_generator,
            # input_api,
            google_search_check,
            scholar_mode_check,
            year_from,
            month_from,
            day_from,
            year_to,
            month_to,
            day_to,
            domains_to_include,
            pdf_file_input,
            history,
            output_article,
            include_sites,
            exclude_sites,
            ai_comments,
        ],
        outputs=[output_article, history],
    )

    ai_check_btn.click(
        fn=ai_check,
        inputs=[history, ai_detector_dropdown],
        outputs=[ai_check_result, highlighted_text, mc_check_result],
    )

    humanize_btn.click(
        fn=humanize,
        inputs=[
            model_dropdown,
            output_article,
            temperature_slider,
            repetition_penalty_slider,
            top_k_slider,
            length_penalty_slider,
            history,
        ],
        outputs=[output_article, history, latest_humanizer_data],
    )

    generate_btn.click(get_history, inputs=[history], outputs=[history_chat])
    regenerate_btn.click(get_history, inputs=[history], outputs=[history_chat])
    humanize_btn.click(get_history, inputs=[history], outputs=[history_chat])

# return demo


if __name__ == "__main__":
    # demo = create_interface()
    demo.queue(
        max_size=2,
        default_concurrency_limit=2,
    ).launch(server_name="0.0.0.0", share=True, server_port=7890)
    # demo.launch(server_name="0.0.0.0")