DSatishchandra commited on
Commit
e94122a
·
verified ·
1 Parent(s): 1ee293f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -23
app.py CHANGED
@@ -4,6 +4,8 @@ import gradio as gr
4
  import fitz # PyMuPDF
5
  import pandas as pd
6
  from io import BytesIO
 
 
7
 
8
  def parse_federal_transformers(file_text: str) -> Dict[str, Union[str, List[Dict[str, Union[str, int, float]]]]]:
9
  parsed_data = {
@@ -50,35 +52,35 @@ def read_pdf(file_path):
50
  return text
51
 
52
  def process_pdf(file):
53
- """Handles PDF file input, extracts text, parses it, and returns an Excel file."""
54
  file_text = read_pdf(file.name) # Read text from PDF
55
  parsed_data = parse_federal_transformers(file_text) # Parse extracted text
56
 
57
  # Convert parsed data to DataFrame and create an Excel file
58
- output = BytesIO()
59
- with pd.ExcelWriter(output, engine='openpyxl') as writer:
60
- # Write main PO details to the first sheet
61
- main_info = {
62
- "Field": ["Purchase Order No", "Date", "Supplier", "Invoice Address", "Delivery Address", "Currency", "Payment Terms"],
63
- "Value": [
64
- parsed_data["Purchase Order No"],
65
- parsed_data["Date"],
66
- parsed_data["Supplier"],
67
- parsed_data["Invoice Address"],
68
- parsed_data["Delivery Address"],
69
- parsed_data["Currency"],
70
- parsed_data["Payment Terms"]
71
- ]
72
- }
73
- main_df = pd.DataFrame(main_info)
74
- main_df.to_excel(writer, index=False, sheet_name="Purchase Order Details")
75
 
76
- # Write item details to a second sheet
77
- items_df = pd.DataFrame(parsed_data["Items"])
78
- items_df.to_excel(writer, index=False, sheet_name="Items")
79
 
80
- output.seek(0)
81
- return output
82
 
83
  # Create Gradio interface
84
  iface = gr.Interface(
 
4
  import fitz # PyMuPDF
5
  import pandas as pd
6
  from io import BytesIO
7
+ import tempfile
8
+ import os
9
 
10
  def parse_federal_transformers(file_text: str) -> Dict[str, Union[str, List[Dict[str, Union[str, int, float]]]]]:
11
  parsed_data = {
 
52
  return text
53
 
54
  def process_pdf(file):
55
+ """Handles PDF file input, extracts text, parses it, and returns an Excel file path."""
56
  file_text = read_pdf(file.name) # Read text from PDF
57
  parsed_data = parse_federal_transformers(file_text) # Parse extracted text
58
 
59
  # Convert parsed data to DataFrame and create an Excel file
60
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as temp_file:
61
+ with pd.ExcelWriter(temp_file.name, engine='openpyxl') as writer:
62
+ # Write main PO details to the first sheet
63
+ main_info = {
64
+ "Field": ["Purchase Order No", "Date", "Supplier", "Invoice Address", "Delivery Address", "Currency", "Payment Terms"],
65
+ "Value": [
66
+ parsed_data["Purchase Order No"],
67
+ parsed_data["Date"],
68
+ parsed_data["Supplier"],
69
+ parsed_data["Invoice Address"],
70
+ parsed_data["Delivery Address"],
71
+ parsed_data["Currency"],
72
+ parsed_data["Payment Terms"]
73
+ ]
74
+ }
75
+ main_df = pd.DataFrame(main_info)
76
+ main_df.to_excel(writer, index=False, sheet_name="Purchase Order Details")
77
 
78
+ # Write item details to a second sheet
79
+ items_df = pd.DataFrame(parsed_data["Items"])
80
+ items_df.to_excel(writer, index=False, sheet_name="Items")
81
 
82
+ # Return the temporary file path for download
83
+ return temp_file.name
84
 
85
  # Create Gradio interface
86
  iface = gr.Interface(