Spaces:
Runtime error
Runtime error
import pdfplumber | |
import pandas as pd | |
def parse_toshiba_pdf(pdf_path): | |
columns = [ | |
"Purchase Order", "Order Date", "Pos", "Item Code", | |
"Description", "Unit", "Delivery Date", | |
"Quantity", "Basic Price", "Discount", "Cur", "Amount", "Sub Total" | |
] | |
data = [] | |
with pdfplumber.open(pdf_path) as pdf: | |
for page in pdf.pages: | |
table = page.extract_table() | |
if table: | |
for row in table[1:]: # Skip header row | |
# Only add rows that have exactly 13 columns | |
if len(row) == 13: | |
data.append(row) | |
else: | |
print(f"Skipping row due to column mismatch: {row}") | |
# Create a DataFrame with the specified columns | |
df = pd.DataFrame(data, columns=columns) | |
return df | |