File size: 2,342 Bytes
f7f8ecb
 
 
 
 
 
 
 
 
 
 
 
 
3d13654
f7f8ecb
 
 
 
 
 
 
 
 
4fcec9d
f7f8ecb
4fcec9d
 
feb7479
4fcec9d
 
 
 
 
 
 
 
f7f8ecb
4fcec9d
feb7479
 
4fcec9d
3d13654
 
f7f8ecb
 
3d13654
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import pdfplumber
import pandas as pd
import re
import tempfile

def extract_toshiba_data(pdf_file):
    data = []
    purchase_order, order_date = None, None

    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            text = page.extract_text().splitlines()

            # Extract Purchase Order and Order Date if not already found
            if not purchase_order or not order_date:
                for line in text:
                    po_match = re.search(r'Purchase Order\s*:\s*(P\d+)', line)
                    date_match = re.search(r'Order Date\s*:\s*([\d-]+)', line)
                    if po_match:
                        purchase_order = po_match.group(1)
                    if date_match:
                        order_date = date_match.group(1)

            # Extract item details using patterns
            for line in text:
                # Match each line with expected pattern for item rows
                item_match = re.match(r'(\d+)\s+(\d+)\s+(.*?)\s+([\d-]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)', line)
                if item_match:
                    pos = int(item_match.group(1))                   # Position number
                    item_code = item_match.group(2)                 # Item Code
                    item_name = item_match.group(3).strip()         # Item Name/Description (if available)
                    delivery_date = item_match.group(4)             # Delivery Date
                    quantity = float(item_match.group(5))           # Quantity
                    basic_price = float(item_match.group(6))        # Basic Price
                    amount = float(item_match.group(7))             # Calculated Amount
                    sub_total = float(item_match.group(8))          # Subtotal or final price

                    data.append([purchase_order, order_date, pos, item_code, item_name, delivery_date, quantity, basic_price, amount, sub_total])

    # Define DataFrame with the corrected structure
    df = pd.DataFrame(data, columns=["Purchase Order", "Order Date", "Pos", "Item Code", "Item Name", "Delivery Date", "Quantity", "Basic Price", "Amount", "SUB TOTAL"])
    
    # Save to Excel file
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
    df.to_excel(temp_file.name, index=False)
    return temp_file.name