Spaces:
Runtime error
Runtime error
File size: 849 Bytes
dfc33ee 310e3a8 dfc33ee 310e3a8 dfc33ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
import pdfplumber
import pandas as pd
def parse_toshiba_pdf(pdf_path):
columns = [
"Purchase Order", "Order Date", "Pos", "Item Code",
"Description", "Unit", "Delivery Date",
"Quantity", "Basic Price", "Discount", "Cur", "Amount", "Sub Total"
]
data = []
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages:
table = page.extract_table()
if table:
for row in table[1:]: # Skip header row
# Only add rows that have exactly 13 columns
if len(row) == 13:
data.append(row)
else:
print(f"Skipping row due to column mismatch: {row}")
# Create a DataFrame with the specified columns
df = pd.DataFrame(data, columns=columns)
return df
|