neerajkalyank commited on
Commit
feb7479
1 Parent(s): 3d13654

Update toshiba.py

Browse files
Files changed (1) hide show
  1. toshiba.py +13 -15
toshiba.py CHANGED
@@ -23,22 +23,20 @@ def extract_toshiba_data(pdf_file):
23
 
24
  # Extract item details
25
  for line in text:
26
- parts = line.split()
27
- try:
28
- pos = int(parts[0])
29
- if 10 <= pos <= 450:
30
- item_code = parts[1]
31
- unit = parts[3] # Assuming unit appears in a fixed position
32
- delivery_date = parts[4] # Assuming delivery date is next
33
- quantity = float(parts[5])
34
- basic_price = float(parts[6])
35
- amount = quantity * basic_price
36
- sub_total = float(parts[-1]) # Assuming subtotal is the last item on the line
37
- data.append([purchase_order, order_date, pos, item_code, unit, delivery_date, quantity, basic_price, amount, sub_total])
38
- except (ValueError, IndexError):
39
- continue
40
 
41
- # Define DataFrame with the new structure
 
 
42
  df = pd.DataFrame(data, columns=["Purchase Order", "Order Date", "Pos", "Item Code", "Unit", "Delivery Date", "Quantity", "Basic Price", "Amount", "SUB TOTAL"])
43
 
44
  # Save to Excel file
 
23
 
24
  # Extract item details
25
  for line in text:
26
+ item_match = re.match(r'(\d+)\s+(\d+)\s+(NOS|PCS)\s+([\d-]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)', line)
27
+ if item_match:
28
+ pos = int(item_match.group(1))
29
+ item_code = item_match.group(2)
30
+ unit = item_match.group(3)
31
+ delivery_date = item_match.group(4)
32
+ quantity = float(item_match.group(5))
33
+ basic_price = float(item_match.group(6))
34
+ amount = quantity * basic_price
35
+ sub_total = float(item_match.group(7))
 
 
 
 
36
 
37
+ data.append([purchase_order, order_date, pos, item_code, unit, delivery_date, quantity, basic_price, amount, sub_total])
38
+
39
+ # Define DataFrame with the corrected structure
40
  df = pd.DataFrame(data, columns=["Purchase Order", "Order Date", "Pos", "Item Code", "Unit", "Delivery Date", "Quantity", "Basic Price", "Amount", "SUB TOTAL"])
41
 
42
  # Save to Excel file