Spaces:
Sleeping
Sleeping
import pdfplumber | |
import pandas as pd | |
import re | |
import gradio as gr | |
# Individual processing functions | |
def process_bhel(pdf_file): | |
# Place the BHEL-specific code logic here | |
return process_common(pdf_file, "BHEL Output.xlsx") | |
def process_federal_electric(pdf_file): | |
# Place the Federal Electric-specific code logic here | |
return process_common(pdf_file, "Federal Electric Output.xlsx") | |
def process_al_nisf(pdf_file): | |
# Place the AL-NISF-specific code logic here | |
return process_common(pdf_file, "AL-NISF Output.xlsx") | |
def process_common(pdf_file, output_name): | |
""" | |
Generalized function for processing PDFs | |
""" | |
# Replace this with common or specific processing logic | |
with pdfplumber.open(pdf_file.name) as pdf: | |
text = "" | |
for page in pdf.pages: | |
text += page.extract_text() | |
# Example: Create dummy DataFrame | |
data = {"Text": text} | |
df = pd.DataFrame([data]) | |
df.to_excel(output_name, index=False) | |
return output_name | |
# Dropdown processing function | |
def main_process(pdf_file, format_choice): | |
if format_choice == "BHEL": | |
return process_bhel(pdf_file) | |
elif format_choice == "Federal Electric": | |
return process_federal_electric(pdf_file) | |
elif format_choice == "AL-NISF": | |
return process_al_nisf(pdf_file) | |
else: | |
return None | |
# Gradio interface | |
iface = gr.Interface( | |
fn=main_process, | |
inputs=[ | |
gr.File(label="Upload PDF", file_types=[".pdf"]), | |
gr.Dropdown(choices=["BHEL", "Federal Electric", "AL-NISF"], label="Select Format") | |
], | |
outputs=gr.File(label="Download Processed File"), | |
title="Consolidated PO Data Extractor", | |
description="Select the format and upload a PDF to extract and download the data." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |