Spaces:
Runtime error
Runtime error
import pandas as pd | |
import tabula | |
def extract_data(pdf_file): | |
# Extract data from the PDF file using tabula | |
tables = tabula.read_pdf(pdf_file, pages='all') | |
# Combine the extracted tables into a single DataFrame | |
data = pd.concat(tables, ignore_index=True) | |
# Rename columns to match the expected output format | |
data.columns = ['Purchase Order No', 'Date', 'Material Description', 'Unit', 'Quantity', 'Dely Qty', 'Dely Date', 'Unit Rate', 'Value'] | |
# Remove any unnecessary rows and columns | |
data = data.dropna(how='all') | |
return data | |
if __name__ == "__main__": | |
pdf_file = 'your_pdf_file.pdf' | |
data = extract_data(pdf_file) | |
# Save the extracted data to an Excel file | |
data.to_excel('output.xlsx', index=False) |