Spaces:
Runtime error
Runtime error
File size: 759 Bytes
d2251a9 80b61aa d2251a9 80b61aa 2659c0a 80b61aa 2659c0a 80b61aa 2659c0a 80b61aa 2659c0a 80b61aa 2659c0a 80b61aa 2659c0a 80b61aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
import pandas as pd
import tabula
def extract_data(pdf_file):
# Extract data from the PDF file using tabula
tables = tabula.read_pdf(pdf_file, pages='all')
# Combine the extracted tables into a single DataFrame
data = pd.concat(tables, ignore_index=True)
# Rename columns to match the expected output format
data.columns = ['Purchase Order No', 'Date', 'Material Description', 'Unit', 'Quantity', 'Dely Qty', 'Dely Date', 'Unit Rate', 'Value']
# Remove any unnecessary rows and columns
data = data.dropna(how='all')
return data
if __name__ == "__main__":
pdf_file = 'your_pdf_file.pdf'
data = extract_data(pdf_file)
# Save the extracted data to an Excel file
data.to_excel('output.xlsx', index=False) |