Spaces:
Sleeping
Sleeping
DSatishchandra
commited on
Commit
•
6bf4435
1
Parent(s):
8a7c854
Update federal_electric.py
Browse files- federal_electric.py +9 -29
federal_electric.py
CHANGED
@@ -89,31 +89,11 @@ def extract_po_data(pdf_file):
|
|
89 |
|
90 |
return df
|
91 |
|
92 |
-
def
|
93 |
"""
|
94 |
-
Processes the uploaded PDF and saves the extracted data
|
95 |
"""
|
96 |
-
df = extract_po_data(pdf_file.name)
|
97 |
-
|
98 |
-
# Save the file in the desired format
|
99 |
-
output_file = f"output.{output_format}"
|
100 |
-
if output_format == "csv":
|
101 |
-
df.to_csv(output_file, index=False)
|
102 |
-
elif output_format == "xlsx":
|
103 |
-
df.to_excel(output_file, index=False, engine="openpyxl")
|
104 |
-
|
105 |
-
return output_file
|
106 |
-
|
107 |
-
# Gradio interface function
|
108 |
-
def gradio_interface(pdf_file, output_format):
|
109 |
-
output_file = process_and_save(pdf_file, output_format)
|
110 |
-
return output_file
|
111 |
-
|
112 |
-
def process_pdf(file):
|
113 |
try:
|
114 |
-
# Extract text from the PDF
|
115 |
-
text = extract_text_from_pdf(file)
|
116 |
-
|
117 |
# Process the extracted text into a DataFrame
|
118 |
df = extract_po_data(file.name)
|
119 |
|
@@ -124,16 +104,16 @@ def process_pdf(file):
|
|
124 |
except Exception as e:
|
125 |
return None, f"Error during processing: {str(e)}"
|
126 |
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
# Gradio app interface
|
131 |
iface = gr.Interface(
|
132 |
-
fn=
|
133 |
-
inputs=[gr.File(label="Upload PDF")
|
134 |
-
outputs=
|
|
|
|
|
|
|
135 |
title="Enhanced PO Data Extractor",
|
136 |
-
description="Extract data from Purchase Orders, including multi-line descriptions, and clean unwanted text or symbols.
|
137 |
)
|
138 |
|
139 |
if __name__ == "__main__":
|
|
|
89 |
|
90 |
return df
|
91 |
|
92 |
+
def process_pdf(file):
|
93 |
"""
|
94 |
+
Processes the uploaded PDF and saves the extracted data.
|
95 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
try:
|
|
|
|
|
|
|
97 |
# Process the extracted text into a DataFrame
|
98 |
df = extract_po_data(file.name)
|
99 |
|
|
|
104 |
except Exception as e:
|
105 |
return None, f"Error during processing: {str(e)}"
|
106 |
|
|
|
|
|
|
|
107 |
# Gradio app interface
|
108 |
iface = gr.Interface(
|
109 |
+
fn=process_pdf,
|
110 |
+
inputs=[gr.File(label="Upload PDF")],
|
111 |
+
outputs=[
|
112 |
+
gr.File(label="Download Extracted Data"),
|
113 |
+
gr.Textbox(label="Status")
|
114 |
+
],
|
115 |
title="Enhanced PO Data Extractor",
|
116 |
+
description="Extract data from Purchase Orders, including multi-line descriptions, and clean unwanted text or symbols."
|
117 |
)
|
118 |
|
119 |
if __name__ == "__main__":
|