Spaces:
Sleeping
Sleeping
File size: 1,818 Bytes
615fc49 2928d72 615fc49 37746c7 615fc49 37746c7 615fc49 37746c7 615fc49 2928d72 615fc49 2928d72 615fc49 2928d72 615fc49 2928d72 37746c7 2928d72 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import pdfplumber
import pandas as pd
import re
import gradio as gr
# Individual processing functions
def process_bhel(pdf_file):
# Place the BHEL-specific code logic here
return process_common(pdf_file, "BHEL Output.xlsx")
def process_federal_electric(pdf_file):
# Place the Federal Electric-specific code logic here
return process_common(pdf_file, "Federal Electric Output.xlsx")
def process_al_nisf(pdf_file):
# Place the AL-NISF-specific code logic here
return process_common(pdf_file, "AL-NISF Output.xlsx")
def process_common(pdf_file, output_name):
"""
Generalized function for processing PDFs
"""
# Replace this with common or specific processing logic
with pdfplumber.open(pdf_file.name) as pdf:
text = ""
for page in pdf.pages:
text += page.extract_text()
# Example: Create dummy DataFrame
data = {"Text": text}
df = pd.DataFrame([data])
df.to_excel(output_name, index=False)
return output_name
# Dropdown processing function
def main_process(pdf_file, format_choice):
if format_choice == "BHEL":
return process_bhel(pdf_file)
elif format_choice == "Federal Electric":
return process_federal_electric(pdf_file)
elif format_choice == "AL-NISF":
return process_al_nisf(pdf_file)
else:
return None
# Gradio interface
iface = gr.Interface(
fn=main_process,
inputs=[
gr.File(label="Upload PDF", file_types=[".pdf"]),
gr.Dropdown(choices=["BHEL", "Federal Electric", "AL-NISF"], label="Select Format")
],
outputs=gr.File(label="Download Processed File"),
title="Consolidated PO Data Extractor",
description="Select the format and upload a PDF to extract and download the data."
)
if __name__ == "__main__":
iface.launch()
|