pdf-annotator / app.py
samyak152002's picture
Update app.py
a04c0d5 verified
import gradio as gr
from annotations import analyze_pdf
import io
def process_pdf(file):
MAX_SIZE = 10 * 1024 * 1024 # 10MB
if file is None:
return "❌ No file uploaded.", None
if len(file) > MAX_SIZE:
return "❌ File size exceeds the 10MB limit.", None
try:
# Wrap the binary data in BytesIO to make it file-like
file_like = io.BytesIO(file)
# Analyze the PDF
issues, annotated_pdf = analyze_pdf(file_like)
if "error" in issues:
return f"❌ Error: {issues['error']}", None
# Prepare issues for display
if issues['total_issues'] == 0:
issues_display = "βœ… No language issues found. Great job!"
else:
issues_display = f"**Total Issues Found:** {issues['total_issues']}\n\n"
for idx, issue in enumerate(issues['issues'], start=1):
issues_display += f"**Issue {idx}:**\n"
issues_display += f"- **Message:** {issue['message']}\n"
issues_display += f"- **Context:** {issue['context']}\n"
issues_display += f"- **Suggestions:** {', '.join(issue['suggestions']) if issue['suggestions'] else 'None'}\n"
issues_display += f"- **Category:** {issue['category']}\n"
issues_display += f"- **Rule ID:** {issue['rule_id']}\n"
issues_display += f"- **Offset:** {issue['offset']}\n"
issues_display += f"- **Length:** {issue['length']}\n\n"
# Prepare annotated PDF for download
if annotated_pdf:
# Return a dictionary with 'name' and 'data' keys
annotated_pdf_dict = {
"name": "annotated_document.pdf",
"data": annotated_pdf
}
return issues_display, annotated_pdf_dict
else:
return issues_display, None
except language_tool_python.LanguageToolError as e:
return f"❌ LanguageTool Error: {str(e)}", None
except Exception as e:
return f"❌ An unexpected error occurred: {str(e)}", None
with gr.Blocks() as demo:
gr.Markdown("# πŸ“„ PDF Language Issue Analyzer")
gr.Markdown("Upload a PDF to analyze language issues and receive an annotated PDF.")
with gr.Row():
with gr.Column():
pdf_input = gr.File(label="πŸ“‚ Upload PDF", type="binary")
analyze_button = gr.Button("πŸ” Analyze PDF")
with gr.Column():
issues_output = gr.Markdown(label="πŸ“ Language Issues")
annotated_pdf_output = gr.File(label="πŸ’Ύ Download Annotated PDF")
analyze_button.click(
fn=process_pdf,
inputs=pdf_input,
outputs=[issues_output, annotated_pdf_output]
)
gr.Markdown("""
---
**Note:** The annotated PDF highlights the detected language issues. Click the download link to view the annotated document.
""")
demo.launch()