DSatishchandra commited on
Commit
7cedb75
·
verified ·
1 Parent(s): f4034fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -50
app.py CHANGED
@@ -1,59 +1,50 @@
1
- import pdfplumber
2
- import pandas as pd
3
- import re
4
  import gradio as gr
 
 
 
5
 
6
- # Individual processing functions
7
- def process_bhel(pdf_file):
8
- # Place the BHEL-specific code logic here
9
- return process_common(pdf_file, "BHEL Output.xlsx")
10
-
11
- def process_federal_electric(pdf_file):
12
- # Place the Federal Electric-specific code logic here
13
- return process_common(pdf_file, "Federal Electric Output.xlsx")
14
-
15
- def process_al_nisf(pdf_file):
16
- # Place the AL-NISF-specific code logic here
17
- return process_common(pdf_file, "AL-NISF Output.xlsx")
18
-
19
- def process_common(pdf_file, output_name):
20
  """
21
- Generalized function for processing PDFs
 
 
 
 
 
22
  """
23
- # Replace this with common or specific processing logic
24
- with pdfplumber.open(pdf_file.name) as pdf:
25
- text = ""
26
- for page in pdf.pages:
27
- text += page.extract_text()
28
-
29
- # Example: Create dummy DataFrame
30
- data = {"Text": text}
31
- df = pd.DataFrame([data])
32
- df.to_excel(output_name, index=False)
33
- return output_name
34
-
35
- # Dropdown processing function
36
- def main_process(pdf_file, format_choice):
37
- if format_choice == "BHEL":
38
- return process_bhel(pdf_file)
39
- elif format_choice == "Federal Electric":
40
  return process_federal_electric(pdf_file)
41
- elif format_choice == "AL-NISF":
42
- return process_al_nisf(pdf_file)
43
  else:
44
- return None
45
 
46
- # Gradio interface
47
- iface = gr.Interface(
48
- fn=main_process,
49
- inputs=[
50
- gr.File(label="Upload PDF", file_types=[".pdf"]),
51
- gr.Dropdown(choices=["BHEL", "Federal Electric", "AL-NISF"], label="Select Format")
52
- ],
53
- outputs=gr.File(label="Download Processed File"),
54
- title="Consolidated PO Data Extractor",
55
- description="Select the format and upload a PDF to extract and download the data."
56
- )
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  if __name__ == "__main__":
59
- iface.launch()
 
 
 
 
 
1
  import gradio as gr
2
+ from ALNISF import process_pdf as process_alnisf
3
+ from federal_electric import process_pdf as process_federal_electric
4
+ from bhel import process_pdf as process_bhel
5
 
6
+ # Wrapper function to handle multiple extractors
7
+ def process_file(pdf_file, extractor):
 
 
 
 
 
 
 
 
 
 
 
 
8
  """
9
+ Processes the uploaded PDF using the selected extractor.
10
+ Args:
11
+ pdf_file: Uploaded PDF file.
12
+ extractor: The selected extractor script (ALNISF, Federal Electric, BHEL).
13
+ Returns:
14
+ The extracted file and status message.
15
  """
16
+ if extractor == "ALNISF":
17
+ return process_alnisf(pdf_file)
18
+ elif extractor == "Federal Electric":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  return process_federal_electric(pdf_file)
20
+ elif extractor == "BHEL":
21
+ return process_bhel(pdf_file)
22
  else:
23
+ return None, "Invalid extractor selected."
24
 
25
+ # Gradio interface setup
26
+ def create_main_interface():
27
+ """
28
+ Creates the main Gradio interface that integrates all extractors.
29
+ """
30
+ return gr.Interface(
31
+ fn=process_file,
32
+ inputs=[
33
+ gr.File(label="Upload PDF", file_types=[".pdf"]),
34
+ gr.Radio(
35
+ ["ALNISF", "Federal Electric", "BHEL"],
36
+ label="Select Extractor",
37
+ value="ALNISF"
38
+ ),
39
+ ],
40
+ outputs=[
41
+ gr.File(label="Download Extracted Data"),
42
+ gr.Textbox(label="Status"),
43
+ ],
44
+ title="Unified PO Data Extraction App",
45
+ description="Upload a Purchase Order PDF and select the extractor (ALNISF, Federal Electric, BHEL) to process the file and download the extracted data.",
46
+ )
47
 
48
  if __name__ == "__main__":
49
+ interface = create_main_interface()
50
+ interface.launch()