Spaces:
Sleeping
Sleeping
neerajkalyank
commited on
Commit
•
938ff71
1
Parent(s):
a72b612
Update app.py
Browse files
app.py
CHANGED
@@ -15,6 +15,7 @@ def extract_data_from_pdf(pdf_file):
|
|
15 |
# Extract text from each page
|
16 |
text = page.extract_text()
|
17 |
if text:
|
|
|
18 |
text_data.append(text)
|
19 |
|
20 |
# Initialize list for parsed data
|
@@ -38,13 +39,7 @@ def extract_data_from_pdf(pdf_file):
|
|
38 |
row["discount"] = float(row["discount"])
|
39 |
row["amount"] = float(row["amount"])
|
40 |
|
41 |
-
#
|
42 |
-
central_gst = row["amount"] * 0.09 # Central GST 9%
|
43 |
-
state_gst = row["amount"] * 0.09 # State GST 9%
|
44 |
-
row["Central GST"] = round(central_gst, 2)
|
45 |
-
row["State GST"] = round(state_gst, 2)
|
46 |
-
row["Sub Total"] = round(row["amount"] + central_gst + state_gst - row["discount"], 2)
|
47 |
-
|
48 |
data.append(row)
|
49 |
|
50 |
# Create DataFrame if data was extracted
|
@@ -52,7 +47,7 @@ def extract_data_from_pdf(pdf_file):
|
|
52 |
df = pd.DataFrame(data)
|
53 |
df.columns = [
|
54 |
"Pos", "Item Code", "Description", "Unit", "Delivery Date", "Quantity", "Basic Price",
|
55 |
-
"Discount", "Currency", "Amount"
|
56 |
]
|
57 |
|
58 |
# Save the DataFrame to a temporary Excel file
|
@@ -78,7 +73,7 @@ iface = gr.Interface(
|
|
78 |
description=(
|
79 |
"Upload a PDF file to extract structured purchase order data and download it as an Excel file. "
|
80 |
"The app will parse rows with fields like Position, Item Code, Description, Quantity, Price, etc. "
|
81 |
-
"
|
82 |
),
|
83 |
)
|
84 |
|
|
|
15 |
# Extract text from each page
|
16 |
text = page.extract_text()
|
17 |
if text:
|
18 |
+
print(f"Extracted text from page {page.page_number}:\n{text}\n") # Debugging: Print extracted text
|
19 |
text_data.append(text)
|
20 |
|
21 |
# Initialize list for parsed data
|
|
|
39 |
row["discount"] = float(row["discount"])
|
40 |
row["amount"] = float(row["amount"])
|
41 |
|
42 |
+
# Append extracted row to data
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
data.append(row)
|
44 |
|
45 |
# Create DataFrame if data was extracted
|
|
|
47 |
df = pd.DataFrame(data)
|
48 |
df.columns = [
|
49 |
"Pos", "Item Code", "Description", "Unit", "Delivery Date", "Quantity", "Basic Price",
|
50 |
+
"Discount", "Currency", "Amount"
|
51 |
]
|
52 |
|
53 |
# Save the DataFrame to a temporary Excel file
|
|
|
73 |
description=(
|
74 |
"Upload a PDF file to extract structured purchase order data and download it as an Excel file. "
|
75 |
"The app will parse rows with fields like Position, Item Code, Description, Quantity, Price, etc. "
|
76 |
+
"No additional calculations are performed; it simply extracts the data as it appears."
|
77 |
),
|
78 |
)
|
79 |
|