neerajkalyank commited on
Commit
3d13654
·
verified ·
1 Parent(s): 183531d

Update toshiba.py

Browse files
Files changed (1) hide show
  1. toshiba.py +14 -6
toshiba.py CHANGED
@@ -11,6 +11,7 @@ def extract_toshiba_data(pdf_file):
11
  for page in pdf.pages:
12
  text = page.extract_text().splitlines()
13
 
 
14
  if not purchase_order or not order_date:
15
  for line in text:
16
  po_match = re.search(r'Purchase Order\s*:\s*(P\d+)', line)
@@ -20,20 +21,27 @@ def extract_toshiba_data(pdf_file):
20
  if date_match:
21
  order_date = date_match.group(1)
22
 
 
23
  for line in text:
24
  parts = line.split()
25
  try:
26
  pos = int(parts[0])
27
  if 10 <= pos <= 450:
28
  item_code = parts[1]
29
- quantity = float(parts[4])
30
- basic_price = float(parts[5])
31
- sub_total = float(parts[-1])
32
- data.append([purchase_order, order_date, pos, item_code, quantity, basic_price, sub_total])
 
 
 
33
  except (ValueError, IndexError):
34
  continue
35
 
36
- df = pd.DataFrame(data, columns=["Purchase Order", "Order Date", "Pos", "Item Code", "Quantity", "Basic Price", "Sub Total"])
 
 
 
37
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
38
  df.to_excel(temp_file.name, index=False)
39
- return temp_file.name
 
11
  for page in pdf.pages:
12
  text = page.extract_text().splitlines()
13
 
14
+ # Extract Purchase Order and Order Date if not already found
15
  if not purchase_order or not order_date:
16
  for line in text:
17
  po_match = re.search(r'Purchase Order\s*:\s*(P\d+)', line)
 
21
  if date_match:
22
  order_date = date_match.group(1)
23
 
24
+ # Extract item details
25
  for line in text:
26
  parts = line.split()
27
  try:
28
  pos = int(parts[0])
29
  if 10 <= pos <= 450:
30
  item_code = parts[1]
31
+ unit = parts[3] # Assuming unit appears in a fixed position
32
+ delivery_date = parts[4] # Assuming delivery date is next
33
+ quantity = float(parts[5])
34
+ basic_price = float(parts[6])
35
+ amount = quantity * basic_price
36
+ sub_total = float(parts[-1]) # Assuming subtotal is the last item on the line
37
+ data.append([purchase_order, order_date, pos, item_code, unit, delivery_date, quantity, basic_price, amount, sub_total])
38
  except (ValueError, IndexError):
39
  continue
40
 
41
+ # Define DataFrame with the new structure
42
+ df = pd.DataFrame(data, columns=["Purchase Order", "Order Date", "Pos", "Item Code", "Unit", "Delivery Date", "Quantity", "Basic Price", "Amount", "SUB TOTAL"])
43
+
44
+ # Save to Excel file
45
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
46
  df.to_excel(temp_file.name, index=False)
47
+ return temp_file.name