Spaces:
Sleeping
Sleeping
DSatishchandra
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,59 +1,50 @@
|
|
1 |
-
import pdfplumber
|
2 |
-
import pandas as pd
|
3 |
-
import re
|
4 |
import gradio as gr
|
|
|
|
|
|
|
5 |
|
6 |
-
#
|
7 |
-
def
|
8 |
-
# Place the BHEL-specific code logic here
|
9 |
-
return process_common(pdf_file, "BHEL Output.xlsx")
|
10 |
-
|
11 |
-
def process_federal_electric(pdf_file):
|
12 |
-
# Place the Federal Electric-specific code logic here
|
13 |
-
return process_common(pdf_file, "Federal Electric Output.xlsx")
|
14 |
-
|
15 |
-
def process_al_nisf(pdf_file):
|
16 |
-
# Place the AL-NISF-specific code logic here
|
17 |
-
return process_common(pdf_file, "AL-NISF Output.xlsx")
|
18 |
-
|
19 |
-
def process_common(pdf_file, output_name):
|
20 |
"""
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
22 |
"""
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
for page in pdf.pages:
|
27 |
-
text += page.extract_text()
|
28 |
-
|
29 |
-
# Example: Create dummy DataFrame
|
30 |
-
data = {"Text": text}
|
31 |
-
df = pd.DataFrame([data])
|
32 |
-
df.to_excel(output_name, index=False)
|
33 |
-
return output_name
|
34 |
-
|
35 |
-
# Dropdown processing function
|
36 |
-
def main_process(pdf_file, format_choice):
|
37 |
-
if format_choice == "BHEL":
|
38 |
-
return process_bhel(pdf_file)
|
39 |
-
elif format_choice == "Federal Electric":
|
40 |
return process_federal_electric(pdf_file)
|
41 |
-
elif
|
42 |
-
return
|
43 |
else:
|
44 |
-
return None
|
45 |
|
46 |
-
# Gradio interface
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
if __name__ == "__main__":
|
59 |
-
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
from ALNISF import process_pdf as process_alnisf
|
3 |
+
from federal_electric import process_pdf as process_federal_electric
|
4 |
+
from bhel import process_pdf as process_bhel
|
5 |
|
6 |
+
# Wrapper function to handle multiple extractors
|
7 |
+
def process_file(pdf_file, extractor):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
"""
|
9 |
+
Processes the uploaded PDF using the selected extractor.
|
10 |
+
Args:
|
11 |
+
pdf_file: Uploaded PDF file.
|
12 |
+
extractor: The selected extractor script (ALNISF, Federal Electric, BHEL).
|
13 |
+
Returns:
|
14 |
+
The extracted file and status message.
|
15 |
"""
|
16 |
+
if extractor == "ALNISF":
|
17 |
+
return process_alnisf(pdf_file)
|
18 |
+
elif extractor == "Federal Electric":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
return process_federal_electric(pdf_file)
|
20 |
+
elif extractor == "BHEL":
|
21 |
+
return process_bhel(pdf_file)
|
22 |
else:
|
23 |
+
return None, "Invalid extractor selected."
|
24 |
|
25 |
+
# Gradio interface setup
|
26 |
+
def create_main_interface():
|
27 |
+
"""
|
28 |
+
Creates the main Gradio interface that integrates all extractors.
|
29 |
+
"""
|
30 |
+
return gr.Interface(
|
31 |
+
fn=process_file,
|
32 |
+
inputs=[
|
33 |
+
gr.File(label="Upload PDF", file_types=[".pdf"]),
|
34 |
+
gr.Radio(
|
35 |
+
["ALNISF", "Federal Electric", "BHEL"],
|
36 |
+
label="Select Extractor",
|
37 |
+
value="ALNISF"
|
38 |
+
),
|
39 |
+
],
|
40 |
+
outputs=[
|
41 |
+
gr.File(label="Download Extracted Data"),
|
42 |
+
gr.Textbox(label="Status"),
|
43 |
+
],
|
44 |
+
title="Unified PO Data Extraction App",
|
45 |
+
description="Upload a Purchase Order PDF and select the extractor (ALNISF, Federal Electric, BHEL) to process the file and download the extracted data.",
|
46 |
+
)
|
47 |
|
48 |
if __name__ == "__main__":
|
49 |
+
interface = create_main_interface()
|
50 |
+
interface.launch()
|