Spaces:
Runtime error
Runtime error
DSatishchandra
commited on
Commit
•
4482309
1
Parent(s):
81fd83e
Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,16 @@
|
|
1 |
import pdfplumber
|
2 |
import pandas as pd
|
3 |
import gradio as gr
|
|
|
4 |
|
5 |
# Define function to extract data
|
6 |
def extract_data(pdf_file):
|
7 |
data = []
|
8 |
columns = ["Purchase Order No", "Date", "SI No", "Material Number", "Material Description", "HSN Code", "IGST", "Unit", "Quantity", "Dely Qty", "Dely Date", "Unit Rate", "Value"]
|
9 |
|
10 |
-
# Example Purchase Order Details (Adjust accordingly
|
11 |
-
purchase_order_no = "
|
12 |
-
purchase_order_date = "
|
13 |
|
14 |
with pdfplumber.open(pdf_file) as pdf:
|
15 |
for page in pdf.pages:
|
@@ -18,13 +19,12 @@ def extract_data(pdf_file):
|
|
18 |
parts = line.split()
|
19 |
try:
|
20 |
si_no = int(parts[0]) # Extract SI No
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
material_desc = " ".join(parts[1:3]) # Adjust indexing if necessary
|
25 |
material_number = parts[3] if "Material" in parts else "220736540000" # Default if not found
|
26 |
-
hsn_code = "8310" # Fixed
|
27 |
-
igst = "18%" # Fixed
|
28 |
unit = parts[4]
|
29 |
quantity = int(parts[5])
|
30 |
dely_qty = int(parts[6])
|
|
|
1 |
import pdfplumber
|
2 |
import pandas as pd
|
3 |
import gradio as gr
|
4 |
+
import re
|
5 |
|
6 |
# Define function to extract data
|
7 |
def extract_data(pdf_file):
|
8 |
data = []
|
9 |
columns = ["Purchase Order No", "Date", "SI No", "Material Number", "Material Description", "HSN Code", "IGST", "Unit", "Quantity", "Dely Qty", "Dely Date", "Unit Rate", "Value"]
|
10 |
|
11 |
+
# Example Purchase Order Details (Adjust accordingly)
|
12 |
+
purchase_order_no = "7200018552"
|
13 |
+
purchase_order_date = "28.09.2024"
|
14 |
|
15 |
with pdfplumber.open(pdf_file) as pdf:
|
16 |
for page in pdf.pages:
|
|
|
19 |
parts = line.split()
|
20 |
try:
|
21 |
si_no = int(parts[0]) # Extract SI No
|
22 |
+
if si_no % 10 == 0: # Assuming SI numbers are in multiples of 10
|
23 |
+
# Extracting fields based on pattern and order as per the provided format
|
24 |
+
material_desc = "BPS 017507" # Based on your example; adjust if dynamic
|
|
|
25 |
material_number = parts[3] if "Material" in parts else "220736540000" # Default if not found
|
26 |
+
hsn_code = "8310" # Fixed HSN Code
|
27 |
+
igst = "18%" # Fixed IGST
|
28 |
unit = parts[4]
|
29 |
quantity = int(parts[5])
|
30 |
dely_qty = int(parts[6])
|