DSatishchandra's picture
Update app.py
615fc49 verified
raw
history blame
1.82 kB
import pdfplumber
import pandas as pd
import re
import gradio as gr
# Individual processing functions
def process_bhel(pdf_file):
# Place the BHEL-specific code logic here
return process_common(pdf_file, "BHEL Output.xlsx")
def process_federal_electric(pdf_file):
# Place the Federal Electric-specific code logic here
return process_common(pdf_file, "Federal Electric Output.xlsx")
def process_al_nisf(pdf_file):
# Place the AL-NISF-specific code logic here
return process_common(pdf_file, "AL-NISF Output.xlsx")
def process_common(pdf_file, output_name):
"""
Generalized function for processing PDFs
"""
# Replace this with common or specific processing logic
with pdfplumber.open(pdf_file.name) as pdf:
text = ""
for page in pdf.pages:
text += page.extract_text()
# Example: Create dummy DataFrame
data = {"Text": text}
df = pd.DataFrame([data])
df.to_excel(output_name, index=False)
return output_name
# Dropdown processing function
def main_process(pdf_file, format_choice):
if format_choice == "BHEL":
return process_bhel(pdf_file)
elif format_choice == "Federal Electric":
return process_federal_electric(pdf_file)
elif format_choice == "AL-NISF":
return process_al_nisf(pdf_file)
else:
return None
# Gradio interface
iface = gr.Interface(
fn=main_process,
inputs=[
gr.File(label="Upload PDF", file_types=[".pdf"]),
gr.Dropdown(choices=["BHEL", "Federal Electric", "AL-NISF"], label="Select Format")
],
outputs=gr.File(label="Download Processed File"),
title="Consolidated PO Data Extractor",
description="Select the format and upload a PDF to extract and download the data."
)
if __name__ == "__main__":
iface.launch()