Spaces:
Sleeping
Sleeping
neerajkalyank
commited on
Commit
•
0bb856e
1
Parent(s):
53debb2
Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,11 @@ def extract_data_from_pdf(pdf_file):
|
|
8 |
data = []
|
9 |
po_number = None
|
10 |
|
11 |
-
|
|
|
|
|
|
|
|
|
12 |
for page in pdf.pages:
|
13 |
text = page.extract_text()
|
14 |
|
@@ -58,6 +62,10 @@ def extract_data_from_pdf(pdf_file):
|
|
58 |
df.to_excel(writer, index=False, sheet_name="Extracted Data")
|
59 |
output.seek(0)
|
60 |
|
|
|
|
|
|
|
|
|
61 |
return output
|
62 |
|
63 |
# Gradio Interface
|
|
|
8 |
data = []
|
9 |
po_number = None
|
10 |
|
11 |
+
# Save BytesIO to temporary file
|
12 |
+
with open("temp.pdf", "wb") as f:
|
13 |
+
f.write(pdf_file.getbuffer())
|
14 |
+
|
15 |
+
with pdfplumber.open("temp.pdf") as pdf:
|
16 |
for page in pdf.pages:
|
17 |
text = page.extract_text()
|
18 |
|
|
|
62 |
df.to_excel(writer, index=False, sheet_name="Extracted Data")
|
63 |
output.seek(0)
|
64 |
|
65 |
+
# Remove temporary PDF file
|
66 |
+
import os
|
67 |
+
os.remove("temp.pdf")
|
68 |
+
|
69 |
return output
|
70 |
|
71 |
# Gradio Interface
|