Spaces:
Runtime error
Runtime error
neerajkalyank
commited on
Update toshiba.py
Browse files- toshiba.py +14 -6
toshiba.py
CHANGED
@@ -11,6 +11,7 @@ def extract_toshiba_data(pdf_file):
|
|
11 |
for page in pdf.pages:
|
12 |
text = page.extract_text().splitlines()
|
13 |
|
|
|
14 |
if not purchase_order or not order_date:
|
15 |
for line in text:
|
16 |
po_match = re.search(r'Purchase Order\s*:\s*(P\d+)', line)
|
@@ -20,20 +21,27 @@ def extract_toshiba_data(pdf_file):
|
|
20 |
if date_match:
|
21 |
order_date = date_match.group(1)
|
22 |
|
|
|
23 |
for line in text:
|
24 |
parts = line.split()
|
25 |
try:
|
26 |
pos = int(parts[0])
|
27 |
if 10 <= pos <= 450:
|
28 |
item_code = parts[1]
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
33 |
except (ValueError, IndexError):
|
34 |
continue
|
35 |
|
36 |
-
|
|
|
|
|
|
|
37 |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
|
38 |
df.to_excel(temp_file.name, index=False)
|
39 |
-
return temp_file.name
|
|
|
11 |
for page in pdf.pages:
|
12 |
text = page.extract_text().splitlines()
|
13 |
|
14 |
+
# Extract Purchase Order and Order Date if not already found
|
15 |
if not purchase_order or not order_date:
|
16 |
for line in text:
|
17 |
po_match = re.search(r'Purchase Order\s*:\s*(P\d+)', line)
|
|
|
21 |
if date_match:
|
22 |
order_date = date_match.group(1)
|
23 |
|
24 |
+
# Extract item details
|
25 |
for line in text:
|
26 |
parts = line.split()
|
27 |
try:
|
28 |
pos = int(parts[0])
|
29 |
if 10 <= pos <= 450:
|
30 |
item_code = parts[1]
|
31 |
+
unit = parts[3] # Assuming unit appears in a fixed position
|
32 |
+
delivery_date = parts[4] # Assuming delivery date is next
|
33 |
+
quantity = float(parts[5])
|
34 |
+
basic_price = float(parts[6])
|
35 |
+
amount = quantity * basic_price
|
36 |
+
sub_total = float(parts[-1]) # Assuming subtotal is the last item on the line
|
37 |
+
data.append([purchase_order, order_date, pos, item_code, unit, delivery_date, quantity, basic_price, amount, sub_total])
|
38 |
except (ValueError, IndexError):
|
39 |
continue
|
40 |
|
41 |
+
# Define DataFrame with the new structure
|
42 |
+
df = pd.DataFrame(data, columns=["Purchase Order", "Order Date", "Pos", "Item Code", "Unit", "Delivery Date", "Quantity", "Basic Price", "Amount", "SUB TOTAL"])
|
43 |
+
|
44 |
+
# Save to Excel file
|
45 |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
|
46 |
df.to_excel(temp_file.name, index=False)
|
47 |
+
return temp_file.name
|