neerajkalyank commited on
Commit
7ebbb35
1 Parent(s): 8755e00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -28
app.py CHANGED
@@ -12,49 +12,43 @@ def extract_data_from_pdf(pdf_file):
12
  for page in pdf.pages:
13
  text = page.extract_text()
14
 
15
- # Extract PO number
16
  if po_number is None:
17
  po_match = re.search(r"Purchase Order : (\w+)", text)
18
  po_number = po_match.group(1) if po_match else "N/A"
19
 
20
- # Regex pattern for row data
21
  row_pattern = re.compile(
22
- r"(\d+)\s+(\d+)\s+(\w+)\s+(\d{4}-\d{2}-\d{2})\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)"
23
  )
24
 
25
- # Extract matching rows
26
  for match in row_pattern.finditer(text):
27
- (
28
- pos,
29
- item_code,
30
- unit,
31
- delivery_date,
32
- quantity,
33
- basic_price,
34
- amount,
35
- ) = match.groups()
36
 
 
37
  sub_total_match = re.search(r"SUB TOTAL : ([\d.]+)", text)
38
  sub_total = sub_total_match.group(1) if sub_total_match else "0.0"
39
 
40
- data.append(
41
- {
42
- "Purchase Order": po_number,
43
- "Pos.": pos,
44
- "Item Code": item_code,
45
- "Unit": unit,
46
- "Delivery Date": delivery_date,
47
- "Quantity": quantity,
48
- "Basic Price": basic_price,
49
- "Amount": amount,
50
- "SUB TOTAL": sub_total,
51
- }
52
- )
53
 
54
  # Convert data to DataFrame and save to Excel
55
  df = pd.DataFrame(data)
56
- output = df.to_excel("output.xlsx", index=False)
57
- return "output.xlsx"
 
58
 
59
  # Gradio Interface
60
  iface = gr.Interface(
 
12
  for page in pdf.pages:
13
  text = page.extract_text()
14
 
15
+ # Extract PO number (only once at the start)
16
  if po_number is None:
17
  po_match = re.search(r"Purchase Order : (\w+)", text)
18
  po_number = po_match.group(1) if po_match else "N/A"
19
 
20
+ # Regex pattern for extracting rows
21
  row_pattern = re.compile(
22
+ r"(\d+)\s+(\d{9})\s+(\w+)\s+(\d{4}-\d{2}-\d{2})\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+INR\s+([\d.]+)"
23
  )
24
 
25
+ # Extract each row using the pattern
26
  for match in row_pattern.finditer(text):
27
+ pos, item_code, unit, delivery_date, quantity, basic_price, discount, amount = match.groups()
 
 
 
 
 
 
 
 
28
 
29
+ # Extract subtotal if present
30
  sub_total_match = re.search(r"SUB TOTAL : ([\d.]+)", text)
31
  sub_total = sub_total_match.group(1) if sub_total_match else "0.0"
32
 
33
+ # Append data for each matched row
34
+ data.append({
35
+ "Purchase Order": po_number,
36
+ "Pos.": pos,
37
+ "Item Code": item_code,
38
+ "Unit": unit,
39
+ "Delivery Date": delivery_date,
40
+ "Quantity": quantity,
41
+ "Basic Price": basic_price,
42
+ "Discount": discount,
43
+ "Amount": amount,
44
+ "SUB TOTAL": sub_total,
45
+ })
46
 
47
  # Convert data to DataFrame and save to Excel
48
  df = pd.DataFrame(data)
49
+ output_file = "output.xlsx"
50
+ df.to_excel(output_file, index=False)
51
+ return output_file
52
 
53
  # Gradio Interface
54
  iface = gr.Interface(