File size: 2,933 Bytes
12d2529
706a020
 
 
12d2529
a04c0d5
12d2529
e8260de
a04c0d5
 
e8260de
 
 
 
 
 
 
 
 
 
 
 
a04c0d5
 
 
 
 
 
 
 
 
 
 
 
 
e8260de
 
 
a04c0d5
 
 
 
 
 
e8260de
 
 
a04c0d5
 
e8260de
 
12d2529
 
e8260de
12d2529
 
 
 
e8260de
 
12d2529
e8260de
 
12d2529
 
 
 
 
 
 
e8260de
 
 
 
 
12d2529
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
from annotations import analyze_pdf
import io

def process_pdf(file):
    MAX_SIZE = 10 * 1024 * 1024  # 10MB
    if file is None:
        return "❌ No file uploaded.", None
    if len(file) > MAX_SIZE:
        return "❌ File size exceeds the 10MB limit.", None

    try:
        # Wrap the binary data in BytesIO to make it file-like
        file_like = io.BytesIO(file)

        # Analyze the PDF
        issues, annotated_pdf = analyze_pdf(file_like)

        if "error" in issues:
            return f"❌ Error: {issues['error']}", None

        # Prepare issues for display
        if issues['total_issues'] == 0:
            issues_display = "βœ… No language issues found. Great job!"
        else:
            issues_display = f"**Total Issues Found:** {issues['total_issues']}\n\n"
            for idx, issue in enumerate(issues['issues'], start=1):
                issues_display += f"**Issue {idx}:**\n"
                issues_display += f"- **Message:** {issue['message']}\n"
                issues_display += f"- **Context:** {issue['context']}\n"
                issues_display += f"- **Suggestions:** {', '.join(issue['suggestions']) if issue['suggestions'] else 'None'}\n"
                issues_display += f"- **Category:** {issue['category']}\n"
                issues_display += f"- **Rule ID:** {issue['rule_id']}\n"
                issues_display += f"- **Offset:** {issue['offset']}\n"
                issues_display += f"- **Length:** {issue['length']}\n\n"

        # Prepare annotated PDF for download
        if annotated_pdf:
            # Return a dictionary with 'name' and 'data' keys
            annotated_pdf_dict = {
                "name": "annotated_document.pdf",
                "data": annotated_pdf
            }
            return issues_display, annotated_pdf_dict
        else:
            return issues_display, None

    except language_tool_python.LanguageToolError as e:
        return f"❌ LanguageTool Error: {str(e)}", None
    except Exception as e:
        return f"❌ An unexpected error occurred: {str(e)}", None

with gr.Blocks() as demo:
    gr.Markdown("# πŸ“„ PDF Language Issue Analyzer")
    gr.Markdown("Upload a PDF to analyze language issues and receive an annotated PDF.")

    with gr.Row():
        with gr.Column():
            pdf_input = gr.File(label="πŸ“‚ Upload PDF", type="binary")
            analyze_button = gr.Button("πŸ” Analyze PDF")
        with gr.Column():
            issues_output = gr.Markdown(label="πŸ“ Language Issues")
            annotated_pdf_output = gr.File(label="πŸ’Ύ Download Annotated PDF")

    analyze_button.click(
        fn=process_pdf,
        inputs=pdf_input,
        outputs=[issues_output, annotated_pdf_output]
    )

    gr.Markdown("""
    ---
    **Note:** The annotated PDF highlights the detected language issues. Click the download link to view the annotated document.
    """)

demo.launch()