Spaces:
Sleeping
Sleeping
neerajkalyank
commited on
Commit
•
48f229e
1
Parent(s):
ac741f6
Update app.py
Browse files
app.py
CHANGED
@@ -1,14 +1,21 @@
|
|
1 |
import pdfplumber
|
2 |
import pandas as pd
|
3 |
from io import BytesIO
|
|
|
4 |
import re
|
5 |
import gradio as gr
|
6 |
|
7 |
def extract_data_from_pdf(pdf_file):
|
8 |
data = []
|
9 |
po_number = None
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
|
|
|
12 |
for page in pdf.pages:
|
13 |
text = page.extract_text()
|
14 |
|
|
|
1 |
import pdfplumber
|
2 |
import pandas as pd
|
3 |
from io import BytesIO
|
4 |
+
import tempfile
|
5 |
import re
|
6 |
import gradio as gr
|
7 |
|
8 |
def extract_data_from_pdf(pdf_file):
|
9 |
data = []
|
10 |
po_number = None
|
11 |
+
|
12 |
+
# Save the uploaded file temporarily so pdfplumber can open it
|
13 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
|
14 |
+
temp_pdf.write(pdf_file.read()) # Write the uploaded file content to a temporary file
|
15 |
+
temp_pdf_path = temp_pdf.name # Get the file path
|
16 |
|
17 |
+
# Now open the temporary file with pdfplumber
|
18 |
+
with pdfplumber.open(temp_pdf_path) as pdf:
|
19 |
for page in pdf.pages:
|
20 |
text = page.extract_text()
|
21 |
|