Spaces:

DSatishchandra
/

PMP_PO_Extraction

Sleeping

App Files Files Community

PMP_PO_Extraction / app.py

DSatishchandra

Update app.py

615fc49 verified about 1 month ago

raw

history blame

1.82 kB

	import pdfplumber
	import pandas as pd
	import re
	import gradio as gr

	# Individual processing functions
	def process_bhel(pdf_file):
	# Place the BHEL-specific code logic here
	return process_common(pdf_file, "BHEL Output.xlsx")

	def process_federal_electric(pdf_file):
	# Place the Federal Electric-specific code logic here
	return process_common(pdf_file, "Federal Electric Output.xlsx")

	def process_al_nisf(pdf_file):
	# Place the AL-NISF-specific code logic here
	return process_common(pdf_file, "AL-NISF Output.xlsx")

	def process_common(pdf_file, output_name):
	"""
	Generalized function for processing PDFs
	"""
	# Replace this with common or specific processing logic
	with pdfplumber.open(pdf_file.name) as pdf:
	text = ""
	for page in pdf.pages:
	text += page.extract_text()

	# Example: Create dummy DataFrame
	data = {"Text": text}
	df = pd.DataFrame([data])
	df.to_excel(output_name, index=False)
	return output_name

	# Dropdown processing function
	def main_process(pdf_file, format_choice):
	if format_choice == "BHEL":
	return process_bhel(pdf_file)
	elif format_choice == "Federal Electric":
	return process_federal_electric(pdf_file)
	elif format_choice == "AL-NISF":
	return process_al_nisf(pdf_file)
	else:
	return None

	# Gradio interface
	iface = gr.Interface(
	fn=main_process,
	inputs=[
	gr.File(label="Upload PDF", file_types=[".pdf"]),
	gr.Dropdown(choices=["BHEL", "Federal Electric", "AL-NISF"], label="Select Format")
	],
	outputs=gr.File(label="Download Processed File"),
	title="Consolidated PO Data Extractor",
	description="Select the format and upload a PDF to extract and download the data."
	)

	if __name__ == "__main__":
	iface.launch()