Spaces:

DSatishchandra
/

POExtraction_UC3

Runtime error

App Files Files Community

POExtraction_UC3 / app.py

DSatishchandra

Update app.py

4482309 verified about 2 months ago

raw

history blame

2.75 kB

	import pdfplumber
	import pandas as pd
	import gradio as gr
	import re

	# Define function to extract data
	def extract_data(pdf_file):
	data = []
	columns = ["Purchase Order No", "Date", "SI No", "Material Number", "Material Description", "HSN Code", "IGST", "Unit", "Quantity", "Dely Qty", "Dely Date", "Unit Rate", "Value"]

	# Example Purchase Order Details (Adjust accordingly)
	purchase_order_no = "7200018552"
	purchase_order_date = "28.09.2024"

	with pdfplumber.open(pdf_file) as pdf:
	for page in pdf.pages:
	text = page.extract_text().splitlines()
	for i, line in enumerate(text):
	parts = line.split()
	try:
	si_no = int(parts[0]) # Extract SI No
	if si_no % 10 == 0: # Assuming SI numbers are in multiples of 10
	# Extracting fields based on pattern and order as per the provided format
	material_desc = "BPS 017507" # Based on your example; adjust if dynamic
	material_number = parts[3] if "Material" in parts else "220736540000" # Default if not found
	hsn_code = "8310" # Fixed HSN Code
	igst = "18%" # Fixed IGST
	unit = parts[4]
	quantity = int(parts[5])
	dely_qty = int(parts[6])
	dely_date = parts[7]
	unit_rate = float(parts[8])
	value = float(parts[9])

	# Append extracted data in specified order
	data.append([
	purchase_order_no,
	purchase_order_date,
	si_no,
	material_number,
	material_desc,
	hsn_code,
	igst,
	unit,
	quantity,
	dely_qty,
	dely_date,
	unit_rate,
	value
	])
	except (ValueError, IndexError):
	continue # Skip lines that don't match the format

	# Convert to DataFrame with specified columns
	df = pd.DataFrame(data, columns=columns)
	excel_path = "/tmp/Extracted_Purchase_Order_Data.xlsx"
	df.to_excel(excel_path, index=False)
	return excel_path

	# Set up Gradio interface
	iface = gr.Interface(
	fn=extract_data,
	inputs=gr.File(label="Upload PDF"),
	outputs=gr.File(label="Download Excel"),
	title="PDF Data Extractor"
	)

	# Launch the app
	iface.launch()