DSatishchandra commited on
Commit
6bf4435
1 Parent(s): 8a7c854

Update federal_electric.py

Browse files
Files changed (1) hide show
  1. federal_electric.py +9 -29
federal_electric.py CHANGED
@@ -89,31 +89,11 @@ def extract_po_data(pdf_file):
89
 
90
  return df
91
 
92
- def process_and_save(pdf_file, output_format):
93
  """
94
- Processes the uploaded PDF and saves the extracted data as an Excel or CSV file.
95
  """
96
- df = extract_po_data(pdf_file.name)
97
-
98
- # Save the file in the desired format
99
- output_file = f"output.{output_format}"
100
- if output_format == "csv":
101
- df.to_csv(output_file, index=False)
102
- elif output_format == "xlsx":
103
- df.to_excel(output_file, index=False, engine="openpyxl")
104
-
105
- return output_file
106
-
107
- # Gradio interface function
108
- def gradio_interface(pdf_file, output_format):
109
- output_file = process_and_save(pdf_file, output_format)
110
- return output_file
111
-
112
- def process_pdf(file):
113
  try:
114
- # Extract text from the PDF
115
- text = extract_text_from_pdf(file)
116
-
117
  # Process the extracted text into a DataFrame
118
  df = extract_po_data(file.name)
119
 
@@ -124,16 +104,16 @@ def process_pdf(file):
124
  except Exception as e:
125
  return None, f"Error during processing: {str(e)}"
126
 
127
-
128
-
129
-
130
  # Gradio app interface
131
  iface = gr.Interface(
132
- fn=gradio_interface,
133
- inputs=[gr.File(label="Upload PDF"), gr.Radio(["csv", "xlsx"], label="Output Format")],
134
- outputs=gr.File(label="Download Output"),
 
 
 
135
  title="Enhanced PO Data Extractor",
136
- description="Extract data from Purchase Orders, including multi-line descriptions, and clean unwanted text or symbols. Download as CSV or Excel."
137
  )
138
 
139
  if __name__ == "__main__":
 
89
 
90
  return df
91
 
92
+ def process_pdf(file):
93
  """
94
+ Processes the uploaded PDF and saves the extracted data.
95
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  try:
 
 
 
97
  # Process the extracted text into a DataFrame
98
  df = extract_po_data(file.name)
99
 
 
104
  except Exception as e:
105
  return None, f"Error during processing: {str(e)}"
106
 
 
 
 
107
  # Gradio app interface
108
  iface = gr.Interface(
109
+ fn=process_pdf,
110
+ inputs=[gr.File(label="Upload PDF")],
111
+ outputs=[
112
+ gr.File(label="Download Extracted Data"),
113
+ gr.Textbox(label="Status")
114
+ ],
115
  title="Enhanced PO Data Extractor",
116
+ description="Extract data from Purchase Orders, including multi-line descriptions, and clean unwanted text or symbols."
117
  )
118
 
119
  if __name__ == "__main__":