Immobilier / app.py
kheopss's picture
Update app.py
868ba24 verified
raw
history blame
8.33 kB
import os # Included in Python's standard library
from openai import OpenAI # Official OpenAI Python package
from IPython.display import Audio # Included in Python's standard library
## supporting functions
import base64, textwrap, time, io
from PIL import Image # Pillow image library
import tempfile
from pdf2image import convert_from_path # Convert PDFs to images
import gradio as gr # For building a UI
from gradio_pdf import PDF # Gradio PDF component
from dotenv import load_dotenv # To load environment variables from .env file
# Load environment variables
load_dotenv()
load_dotenv()
openai_api_key = os.getenv('OPENAI_API_KE')
client = OpenAI(
api_key=openai_api_key) # Fix the key to 'OPENAI_API_KEY'
# Resize the image if it exceeds the max dimension
def resize_image(image, max_dimension):
width, height = image.size
# Convert to RGB or RGBA if necessary (for images with transparency)
if image.mode == "P":
if "transparency" in image.info:
image = image.convert("RGBA")
else:
image = image.convert("RGB")
# Resize the image if it exceeds the max dimension
if width > max_dimension or height > max_dimension:
if width > height:
new_width = max_dimension
new_height = int(height * (max_dimension / width))
else:
new_height = max_dimension
new_width = int(width * (max_dimension / height))
image = image.resize((new_width, new_height), Image.LANCZOS)
return image
# Convert the image to PNG format and return it as a byte stream
def convert_to_png(image):
with io.BytesIO() as output:
image.save(output, format="PNG")
return output.getvalue()
# Process the image (resize and convert if necessary)
def process_image(path, max_size):
with Image.open(path) as image:
width, height = image.size
mimetype = Image.MIME.get(image.format) # Corrected the mimetype retrieval
if mimetype == "image/png" and width <= max_size and height <= max_size:
with open(path, "rb") as f:
encoded_image = base64.b64encode(f.read()).decode('utf-8')
return (encoded_image, max(width, height)) # Return as a tuple
else:
resized_image = resize_image(image, max_size)
png_image = convert_to_png(resized_image)
return (base64.b64encode(png_image).decode('utf-8'),
max(resized_image.size)) # Return resized image size
# Create the image content metadata
def create_image_content(image, maxdim, detail_threshold):
detail = "low" if maxdim < detail_threshold else "high"
return {
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{image}", "detail": detail}
}
# Set the system message
def set_system_message(sysmsg):
return [{
"role": "system",
"content": sysmsg
}]
# Set user message along with attached images
def set_user_message(user_msg_str, file_path_list=[], max_size_px=1024, file_names_list=None, tiled=False, detail_threshold=700):
if not isinstance(file_path_list, list): # Ensure the file list is a list
file_path_list = []
if not file_path_list: # No files means no tiling
tiled = False
# Handle file names if provided
if file_names_list and len(file_names_list) == len(file_path_list):
file_names = file_names_list
else:
file_names = [os.path.basename(path) for path in file_path_list]
# Process images to base64
base64_images = [process_image(path, max_size_px) for path in file_path_list]
uploaded_images_text = ""
if file_names:
uploaded_images_text = "\n\n---\n\nUploaded images:\n" + '\n'.join(file_names)
# Add content based on whether we are tiling images
if tiled:
content = [{"type": "text", "text": user_msg_str + uploaded_images_text}]
content += [create_image_content(image, maxdim, detail_threshold) for image, maxdim in base64_images]
return [{"role": "user", "content": content}]
else:
return [{
"role": "user",
"content": ([user_msg_str + uploaded_images_text]
+ [{"image": image} for image, _ in base64_images])
}]
# Define the path to Poppler (for PDF to image conversion)
poppler_path = '/usr/bin' # Adjust this path if needed
# Add the Poppler path to the system PATH
os.environ['PATH'] += os.pathsep + poppler_path
# Convert a PDF to images
def pdf_to_images(pdf_path, dpi=300, output_format='JPEG'):
temp_dir = tempfile.mkdtemp()
pages = convert_from_path(pdf_path, dpi)
image_paths = []
for i, page in enumerate(pages):
image_path = os.path.join(temp_dir, f'page{i}.{output_format.lower()}')
page.save(image_path, output_format)
image_paths.append(image_path)
return image_paths
# System message setup for the assistant
system_msg = """
You are an intelligent assistant tasked with extracting and validating information from French real estate syndic documents (*appel de fonds*). These documents contain financial details, property information, and owner details. Your job is to extract and ensure the correctness of the following information:
### Task Overview:
You need to extract and validate the following fields:
1. **Total à payer**: The total amount the owner must pay for the period.
2. **Fond travaux alur**: The amount allocated to the ALUR works fund.
3. **Total Part charges prévisionnelles**: The forecasted portion of charges the owner must pay for general building maintenance, collective services, etc.
4. **Part autres travaux**: Any additional expenses related to specific works or repairs.
5. **le solde précédent**: The previous balance from past transactions (can be positive or negative).
6. **Propriétaire**: The name of the property owner.
7. **Adresse du propriétaire**: The postal address of the owner.
8. **Adresse du bien**: The location of the property (address of the unit or building).
9. **Référence**: The reference number of the document or account related to the property.
10. **Date du document**: The date when the document was issued.
11. **Date limite du paiement**: The deadline by which the payment must be made.
12. **Montant total solde en notre faveur**: The total balance in favor of the syndic (if applicable).
### Validation Rule:
The following validation rules must be respected:
- **Total à payer** = **Fond travaux alur** + **Total Part charges prévisionnelles** + **Part autres travaux**.
- The amounts should be taken from the "débit" column, not the "crédit" column, to ensure accuracy. Verify that the **Total à payer** is from the correct column (débit).
- Additionally, both **Total à payer** and **Montant total solde en notre faveur** should be extracted for a cross-check to ensure that the final amounts are accurate and reflect the correct financial state.
### Format for Output:
Return the extracted information in JSON format. If there is a discrepancy (such as a mismatch between amounts or amounts found in the wrong column), return an error message in JSON format explaining the issue.
""".strip()
# Define process function to convert PDF and extract information
def process(pdf):
image_paths = pdf_to_images(pdf)
system = set_system_message(system_msg)
user = set_user_message("", file_path_list=image_paths, max_size_px=1024)
params = {
"model": "gpt-4o", # Fixed the model name
"temperature": 0.01,
"max_tokens": 500,
"stream": False,
"messages": system + user
}
start = time.perf_counter()
try:
response = client.chat.completions.create(**params)
reply = response['choices'][0]['message']['content']
except Exception as e:
print(f"Error during API call: {e}")
return None
print(f"\n[elapsed: {time.perf_counter() - start:.2f} seconds]")
return reply
# Define Gradio interface for the PDF processing
iface = gr.Interface(
fn=process,
inputs=PDF(label="Upload a PDF", interactive=True),
outputs=gr.Textbox(label="Extracted Information"),
title="Immobilier",
description="Upload a PDF and extract the required information."
)
iface.launch(debug=True)