import pdfplumber import pandas as pd def parse_toshiba_pdf(pdf_path): columns = [ "Purchase Order", "Order Date", "Pos", "Item Code", "Description", "Unit", "Delivery Date", "Quantity", "Basic Price", "Discount", "Cur", "Amount", "Sub Total" ] data = [] with pdfplumber.open(pdf_path) as pdf: for page in pdf.pages: table = page.extract_table() if table: for row in table[1:]: # Skip header row # Only add rows that have exactly 13 columns if len(row) == 13: data.append(row) else: print(f"Skipping row due to column mismatch: {row}") # Create a DataFrame with the specified columns df = pd.DataFrame(data, columns=columns) return df