Spaces:

samyak152002
/

pdf-annotator

Sleeping

App Files Files Community

pdf-annotator / app.py

samyak152002

Update app.py

a04c0d5 verified 18 days ago

raw

history blame contribute delete

2.93 kB

	import gradio as gr
	from annotations import analyze_pdf
	import io

	def process_pdf(file):
	MAX_SIZE = 10 * 1024 * 1024 # 10MB
	if file is None:
	return "❌ No file uploaded.", None
	if len(file) > MAX_SIZE:
	return "❌ File size exceeds the 10MB limit.", None

	try:
	# Wrap the binary data in BytesIO to make it file-like
	file_like = io.BytesIO(file)

	# Analyze the PDF
	issues, annotated_pdf = analyze_pdf(file_like)

	if "error" in issues:
	return f"❌ Error: {issues['error']}", None

	# Prepare issues for display
	if issues['total_issues'] == 0:
	issues_display = "✅ No language issues found. Great job!"
	else:
	issues_display = f"Total Issues Found: {issues['total_issues']}\n\n"
	for idx, issue in enumerate(issues['issues'], start=1):
	issues_display += f"Issue {idx}:\n"
	issues_display += f"- Message: {issue['message']}\n"
	issues_display += f"- Context: {issue['context']}\n"
	issues_display += f"- Suggestions: {', '.join(issue['suggestions']) if issue['suggestions'] else 'None'}\n"
	issues_display += f"- Category: {issue['category']}\n"
	issues_display += f"- Rule ID: {issue['rule_id']}\n"
	issues_display += f"- Offset: {issue['offset']}\n"
	issues_display += f"- Length: {issue['length']}\n\n"

	# Prepare annotated PDF for download
	if annotated_pdf:
	# Return a dictionary with 'name' and 'data' keys
	annotated_pdf_dict = {
	"name": "annotated_document.pdf",
	"data": annotated_pdf
	}
	return issues_display, annotated_pdf_dict
	else:
	return issues_display, None

	except language_tool_python.LanguageToolError as e:
	return f"❌ LanguageTool Error: {str(e)}", None
	except Exception as e:
	return f"❌ An unexpected error occurred: {str(e)}", None

	with gr.Blocks() as demo:
	gr.Markdown("# 📄 PDF Language Issue Analyzer")
	gr.Markdown("Upload a PDF to analyze language issues and receive an annotated PDF.")

	with gr.Row():
	with gr.Column():
	pdf_input = gr.File(label="📂 Upload PDF", type="binary")
	analyze_button = gr.Button("🔍 Analyze PDF")
	with gr.Column():
	issues_output = gr.Markdown(label="📝 Language Issues")
	annotated_pdf_output = gr.File(label="💾 Download Annotated PDF")

	analyze_button.click(
	fn=process_pdf,
	inputs=pdf_input,
	outputs=[issues_output, annotated_pdf_output]
	)

	gr.Markdown("""
	---
	Note: The annotated PDF highlights the detected language issues. Click the download link to view the annotated document.
	""")

	demo.launch()