File size: 849 Bytes
dfc33ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310e3a8
 
 
 
 
dfc33ee
310e3a8
dfc33ee
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import pdfplumber
import pandas as pd

def parse_toshiba_pdf(pdf_path):
    columns = [
        "Purchase Order", "Order Date", "Pos", "Item Code", 
        "Description", "Unit", "Delivery Date", 
        "Quantity", "Basic Price", "Discount", "Cur", "Amount", "Sub Total"
    ]
    data = []

    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            table = page.extract_table()
            if table:
                for row in table[1:]:  # Skip header row
                    # Only add rows that have exactly 13 columns
                    if len(row) == 13:
                        data.append(row)
                    else:
                        print(f"Skipping row due to column mismatch: {row}")

    # Create a DataFrame with the specified columns
    df = pd.DataFrame(data, columns=columns)
    return df