neerajkalyank commited on
Commit
4fcec9d
1 Parent(s): feb7479

Update toshiba.py

Browse files
Files changed (1) hide show
  1. toshiba.py +13 -12
toshiba.py CHANGED
@@ -21,23 +21,24 @@ def extract_toshiba_data(pdf_file):
21
  if date_match:
22
  order_date = date_match.group(1)
23
 
24
- # Extract item details
25
  for line in text:
26
- item_match = re.match(r'(\d+)\s+(\d+)\s+(NOS|PCS)\s+([\d-]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)', line)
 
27
  if item_match:
28
- pos = int(item_match.group(1))
29
- item_code = item_match.group(2)
30
- unit = item_match.group(3)
31
- delivery_date = item_match.group(4)
32
- quantity = float(item_match.group(5))
33
- basic_price = float(item_match.group(6))
34
- amount = quantity * basic_price
35
- sub_total = float(item_match.group(7))
36
 
37
- data.append([purchase_order, order_date, pos, item_code, unit, delivery_date, quantity, basic_price, amount, sub_total])
38
 
39
  # Define DataFrame with the corrected structure
40
- df = pd.DataFrame(data, columns=["Purchase Order", "Order Date", "Pos", "Item Code", "Unit", "Delivery Date", "Quantity", "Basic Price", "Amount", "SUB TOTAL"])
41
 
42
  # Save to Excel file
43
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
 
21
  if date_match:
22
  order_date = date_match.group(1)
23
 
24
+ # Extract item details using patterns
25
  for line in text:
26
+ # Match each line with expected pattern for item rows
27
+ item_match = re.match(r'(\d+)\s+(\d+)\s+(.*?)\s+([\d-]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)', line)
28
  if item_match:
29
+ pos = int(item_match.group(1)) # Position number
30
+ item_code = item_match.group(2) # Item Code
31
+ item_name = item_match.group(3).strip() # Item Name/Description (if available)
32
+ delivery_date = item_match.group(4) # Delivery Date
33
+ quantity = float(item_match.group(5)) # Quantity
34
+ basic_price = float(item_match.group(6)) # Basic Price
35
+ amount = float(item_match.group(7)) # Calculated Amount
36
+ sub_total = float(item_match.group(8)) # Subtotal or final price
37
 
38
+ data.append([purchase_order, order_date, pos, item_code, item_name, delivery_date, quantity, basic_price, amount, sub_total])
39
 
40
  # Define DataFrame with the corrected structure
41
+ df = pd.DataFrame(data, columns=["Purchase Order", "Order Date", "Pos", "Item Code", "Item Name", "Delivery Date", "Quantity", "Basic Price", "Amount", "SUB TOTAL"])
42
 
43
  # Save to Excel file
44
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")