import pdfplumber import pandas as pd import re import gradio as gr # Individual processing functions def process_bhel(pdf_file): # Place the BHEL-specific code logic here return process_common(pdf_file, "BHEL Output.xlsx") def process_federal_electric(pdf_file): # Place the Federal Electric-specific code logic here return process_common(pdf_file, "Federal Electric Output.xlsx") def process_al_nisf(pdf_file): # Place the AL-NISF-specific code logic here return process_common(pdf_file, "AL-NISF Output.xlsx") def process_common(pdf_file, output_name): """ Generalized function for processing PDFs """ # Replace this with common or specific processing logic with pdfplumber.open(pdf_file.name) as pdf: text = "" for page in pdf.pages: text += page.extract_text() # Example: Create dummy DataFrame data = {"Text": text} df = pd.DataFrame([data]) df.to_excel(output_name, index=False) return output_name # Dropdown processing function def main_process(pdf_file, format_choice): if format_choice == "BHEL": return process_bhel(pdf_file) elif format_choice == "Federal Electric": return process_federal_electric(pdf_file) elif format_choice == "AL-NISF": return process_al_nisf(pdf_file) else: return None # Gradio interface iface = gr.Interface( fn=main_process, inputs=[ gr.File(label="Upload PDF", file_types=[".pdf"]), gr.Dropdown(choices=["BHEL", "Federal Electric", "AL-NISF"], label="Select Format") ], outputs=gr.File(label="Download Processed File"), title="Consolidated PO Data Extractor", description="Select the format and upload a PDF to extract and download the data." ) if __name__ == "__main__": iface.launch()