neerajkalyank commited on
Commit
8755e00
1 Parent(s): 0bb856e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -16
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import pdfplumber
2
  import pandas as pd
3
- from io import BytesIO
4
  import re
5
  import gradio as gr
6
 
@@ -8,11 +7,8 @@ def extract_data_from_pdf(pdf_file):
8
  data = []
9
  po_number = None
10
 
11
- # Save BytesIO to temporary file
12
- with open("temp.pdf", "wb") as f:
13
- f.write(pdf_file.getbuffer())
14
-
15
- with pdfplumber.open("temp.pdf") as pdf:
16
  for page in pdf.pages:
17
  text = page.extract_text()
18
 
@@ -57,16 +53,8 @@ def extract_data_from_pdf(pdf_file):
57
 
58
  # Convert data to DataFrame and save to Excel
59
  df = pd.DataFrame(data)
60
- output = BytesIO()
61
- with pd.ExcelWriter(output, engine="xlsxwriter") as writer:
62
- df.to_excel(writer, index=False, sheet_name="Extracted Data")
63
- output.seek(0)
64
-
65
- # Remove temporary PDF file
66
- import os
67
- os.remove("temp.pdf")
68
-
69
- return output
70
 
71
  # Gradio Interface
72
  iface = gr.Interface(
 
1
  import pdfplumber
2
  import pandas as pd
 
3
  import re
4
  import gradio as gr
5
 
 
7
  data = []
8
  po_number = None
9
 
10
+ # Open PDF file directly
11
+ with pdfplumber.open(pdf_file.name) as pdf:
 
 
 
12
  for page in pdf.pages:
13
  text = page.extract_text()
14
 
 
53
 
54
  # Convert data to DataFrame and save to Excel
55
  df = pd.DataFrame(data)
56
+ output = df.to_excel("output.xlsx", index=False)
57
+ return "output.xlsx"
 
 
 
 
 
 
 
 
58
 
59
  # Gradio Interface
60
  iface = gr.Interface(