DSatishchandra commited on
Commit
4482309
1 Parent(s): 81fd83e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -1,15 +1,16 @@
1
  import pdfplumber
2
  import pandas as pd
3
  import gradio as gr
 
4
 
5
  # Define function to extract data
6
  def extract_data(pdf_file):
7
  data = []
8
  columns = ["Purchase Order No", "Date", "SI No", "Material Number", "Material Description", "HSN Code", "IGST", "Unit", "Quantity", "Dely Qty", "Dely Date", "Unit Rate", "Value"]
9
 
10
- # Example Purchase Order Details (Adjust accordingly or add dynamic extraction if possible)
11
- purchase_order_no = "PO12345"
12
- purchase_order_date = "04.11.2024"
13
 
14
  with pdfplumber.open(pdf_file) as pdf:
15
  for page in pdf.pages:
@@ -18,13 +19,12 @@ def extract_data(pdf_file):
18
  parts = line.split()
19
  try:
20
  si_no = int(parts[0]) # Extract SI No
21
- # Check if the line follows the expected format for a row
22
- if si_no % 10 == 0: # Assuming SI numbers are in multiples of 10 as per sample
23
- # Extract each field based on position and format
24
- material_desc = " ".join(parts[1:3]) # Adjust indexing if necessary
25
  material_number = parts[3] if "Material" in parts else "220736540000" # Default if not found
26
- hsn_code = "8310" # Fixed as per example; can be extracted if available
27
- igst = "18%" # Fixed as per example; can be extracted if available
28
  unit = parts[4]
29
  quantity = int(parts[5])
30
  dely_qty = int(parts[6])
 
1
  import pdfplumber
2
  import pandas as pd
3
  import gradio as gr
4
+ import re
5
 
6
  # Define function to extract data
7
  def extract_data(pdf_file):
8
  data = []
9
  columns = ["Purchase Order No", "Date", "SI No", "Material Number", "Material Description", "HSN Code", "IGST", "Unit", "Quantity", "Dely Qty", "Dely Date", "Unit Rate", "Value"]
10
 
11
+ # Example Purchase Order Details (Adjust accordingly)
12
+ purchase_order_no = "7200018552"
13
+ purchase_order_date = "28.09.2024"
14
 
15
  with pdfplumber.open(pdf_file) as pdf:
16
  for page in pdf.pages:
 
19
  parts = line.split()
20
  try:
21
  si_no = int(parts[0]) # Extract SI No
22
+ if si_no % 10 == 0: # Assuming SI numbers are in multiples of 10
23
+ # Extracting fields based on pattern and order as per the provided format
24
+ material_desc = "BPS 017507" # Based on your example; adjust if dynamic
 
25
  material_number = parts[3] if "Material" in parts else "220736540000" # Default if not found
26
+ hsn_code = "8310" # Fixed HSN Code
27
+ igst = "18%" # Fixed IGST
28
  unit = parts[4]
29
  quantity = int(parts[5])
30
  dely_qty = int(parts[6])