Spaces:
Sleeping
Sleeping
import gradio as gr | |
from annotations import analyze_pdf | |
import io | |
def process_pdf(file): | |
MAX_SIZE = 10 * 1024 * 1024 # 10MB | |
if file is None: | |
return "β No file uploaded.", None | |
if len(file) > MAX_SIZE: | |
return "β File size exceeds the 10MB limit.", None | |
try: | |
# Wrap the binary data in BytesIO to make it file-like | |
file_like = io.BytesIO(file) | |
# Analyze the PDF | |
issues, annotated_pdf = analyze_pdf(file_like) | |
if "error" in issues: | |
return f"β Error: {issues['error']}", None | |
# Prepare issues for display | |
if issues['total_issues'] == 0: | |
issues_display = "β No language issues found. Great job!" | |
else: | |
issues_display = f"**Total Issues Found:** {issues['total_issues']}\n\n" | |
for idx, issue in enumerate(issues['issues'], start=1): | |
issues_display += f"**Issue {idx}:**\n" | |
issues_display += f"- **Message:** {issue['message']}\n" | |
issues_display += f"- **Context:** {issue['context']}\n" | |
issues_display += f"- **Suggestions:** {', '.join(issue['suggestions']) if issue['suggestions'] else 'None'}\n" | |
issues_display += f"- **Category:** {issue['category']}\n" | |
issues_display += f"- **Rule ID:** {issue['rule_id']}\n" | |
issues_display += f"- **Offset:** {issue['offset']}\n" | |
issues_display += f"- **Length:** {issue['length']}\n\n" | |
# Prepare annotated PDF for download | |
if annotated_pdf: | |
# Return a dictionary with 'name' and 'data' keys | |
annotated_pdf_dict = { | |
"name": "annotated_document.pdf", | |
"data": annotated_pdf | |
} | |
return issues_display, annotated_pdf_dict | |
else: | |
return issues_display, None | |
except language_tool_python.LanguageToolError as e: | |
return f"β LanguageTool Error: {str(e)}", None | |
except Exception as e: | |
return f"β An unexpected error occurred: {str(e)}", None | |
with gr.Blocks() as demo: | |
gr.Markdown("# π PDF Language Issue Analyzer") | |
gr.Markdown("Upload a PDF to analyze language issues and receive an annotated PDF.") | |
with gr.Row(): | |
with gr.Column(): | |
pdf_input = gr.File(label="π Upload PDF", type="binary") | |
analyze_button = gr.Button("π Analyze PDF") | |
with gr.Column(): | |
issues_output = gr.Markdown(label="π Language Issues") | |
annotated_pdf_output = gr.File(label="πΎ Download Annotated PDF") | |
analyze_button.click( | |
fn=process_pdf, | |
inputs=pdf_input, | |
outputs=[issues_output, annotated_pdf_output] | |
) | |
gr.Markdown(""" | |
--- | |
**Note:** The annotated PDF highlights the detected language issues. Click the download link to view the annotated document. | |
""") | |
demo.launch() |