Spaces:

samyak152002
/

pdf-annotator

Sleeping

File size: 2,933 Bytes

import gradio as gr
from annotations import analyze_pdf
import io

def process_pdf(file):
    MAX_SIZE = 10 * 1024 * 1024  # 10MB
    if file is None:
        return "❌ No file uploaded.", None
    if len(file) > MAX_SIZE:
        return "❌ File size exceeds the 10MB limit.", None

    try:
        # Wrap the binary data in BytesIO to make it file-like
        file_like = io.BytesIO(file)

        # Analyze the PDF
        issues, annotated_pdf = analyze_pdf(file_like)

        if "error" in issues:
            return f"❌ Error: {issues['error']}", None

        # Prepare issues for display
        if issues['total_issues'] == 0:
            issues_display = "✅ No language issues found. Great job!"
        else:
            issues_display = f"**Total Issues Found:** {issues['total_issues']}\n\n"
            for idx, issue in enumerate(issues['issues'], start=1):
                issues_display += f"**Issue {idx}:**\n"
                issues_display += f"- **Message:** {issue['message']}\n"
                issues_display += f"- **Context:** {issue['context']}\n"
                issues_display += f"- **Suggestions:** {', '.join(issue['suggestions']) if issue['suggestions'] else 'None'}\n"
                issues_display += f"- **Category:** {issue['category']}\n"
                issues_display += f"- **Rule ID:** {issue['rule_id']}\n"
                issues_display += f"- **Offset:** {issue['offset']}\n"
                issues_display += f"- **Length:** {issue['length']}\n\n"

        # Prepare annotated PDF for download
        if annotated_pdf:
            # Return a dictionary with 'name' and 'data' keys
            annotated_pdf_dict = {
                "name": "annotated_document.pdf",
                "data": annotated_pdf
            }
            return issues_display, annotated_pdf_dict
        else:
            return issues_display, None

    except language_tool_python.LanguageToolError as e:
        return f"❌ LanguageTool Error: {str(e)}", None
    except Exception as e:
        return f"❌ An unexpected error occurred: {str(e)}", None

with gr.Blocks() as demo:
    gr.Markdown("# 📄 PDF Language Issue Analyzer")
    gr.Markdown("Upload a PDF to analyze language issues and receive an annotated PDF.")

    with gr.Row():
        with gr.Column():
            pdf_input = gr.File(label="📂 Upload PDF", type="binary")
            analyze_button = gr.Button("🔍 Analyze PDF")
        with gr.Column():
            issues_output = gr.Markdown(label="📝 Language Issues")
            annotated_pdf_output = gr.File(label="💾 Download Annotated PDF")

    analyze_button.click(
        fn=process_pdf,
        inputs=pdf_input,
        outputs=[issues_output, annotated_pdf_output]
    )

    gr.Markdown("""
    ---
    **Note:** The annotated PDF highlights the detected language issues. Click the download link to view the annotated document.
    """)

demo.launch()