File size: 8,325 Bytes
fabc2f7
 
 
7019ca9
 
fabc2f7
7019ca9
 
fabc2f7
 
 
 
7019ca9
fabc2f7
7019ca9
 
a868541
 
 
 
 
 
7019ca9
fabc2f7
7019ca9
 
 
fabc2f7
7019ca9
 
 
 
 
 
fabc2f7
7019ca9
 
 
 
 
 
 
 
 
 
 
fabc2f7
7019ca9
 
 
 
 
fabc2f7
7019ca9
 
 
fabc2f7
7019ca9
 
 
fabc2f7
7019ca9
 
 
 
fabc2f7
7019ca9
fabc2f7
7019ca9
 
 
 
 
 
 
fabc2f7
7019ca9
 
 
 
 
 
fabc2f7
 
 
7019ca9
 
fabc2f7
7019ca9
 
fabc2f7
7019ca9
 
 
 
 
fabc2f7
7019ca9
 
 
 
 
 
fabc2f7
7019ca9
 
fabc2f7
7019ca9
 
 
 
 
 
 
 
fabc2f7
7019ca9
 
 
 
 
fabc2f7
7019ca9
 
 
 
 
 
 
 
 
 
fabc2f7
7019ca9
19c4633
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d0e7c9
 
19c4633
 
 
 
3d0e7c9
19c4633
 
7019ca9
 
fabc2f7
7019ca9
 
 
fabc2f7
7019ca9
fabc2f7
 
 
 
 
 
7019ca9
 
 
 
868ba24
fabc2f7
7019ca9
 
 
 
fabc2f7
 
7019ca9
fabc2f7
7019ca9
 
fabc2f7
 
 
7019ca9
 
fabc2f7
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import os  # Included in Python's standard library
from openai import OpenAI  # Official OpenAI Python package
from IPython.display import Audio  # Included in Python's standard library

## supporting functions
import base64, textwrap, time, io
from PIL import Image  # Pillow image library
import tempfile
from pdf2image import convert_from_path  # Convert PDFs to images
import gradio as gr  # For building a UI
from gradio_pdf import PDF  # Gradio PDF component
from dotenv import load_dotenv  # To load environment variables from .env file

# Load environment variables
load_dotenv()

load_dotenv()

openai_api_key = os.getenv('OPENAI_API_KE')
client = OpenAI(
    api_key=openai_api_key)  # Fix the key to 'OPENAI_API_KEY'


# Resize the image if it exceeds the max dimension
def resize_image(image, max_dimension):
    width, height = image.size

    # Convert to RGB or RGBA if necessary (for images with transparency)
    if image.mode == "P":
        if "transparency" in image.info:
            image = image.convert("RGBA")
        else:
            image = image.convert("RGB")

    # Resize the image if it exceeds the max dimension
    if width > max_dimension or height > max_dimension:
        if width > height:
            new_width = max_dimension
            new_height = int(height * (max_dimension / width))
        else:
            new_height = max_dimension
            new_width = int(width * (max_dimension / height))
        image = image.resize((new_width, new_height), Image.LANCZOS)

    return image

# Convert the image to PNG format and return it as a byte stream
def convert_to_png(image):
    with io.BytesIO() as output:
        image.save(output, format="PNG")
        return output.getvalue()

# Process the image (resize and convert if necessary)
def process_image(path, max_size):
    with Image.open(path) as image:
        width, height = image.size
        mimetype = Image.MIME.get(image.format)  # Corrected the mimetype retrieval
        if mimetype == "image/png" and width <= max_size and height <= max_size:
            with open(path, "rb") as f:
                encoded_image = base64.b64encode(f.read()).decode('utf-8')
                return (encoded_image, max(width, height))  # Return as a tuple
        else:
            resized_image = resize_image(image, max_size)
            png_image = convert_to_png(resized_image)
            return (base64.b64encode(png_image).decode('utf-8'),
                    max(resized_image.size))  # Return resized image size

# Create the image content metadata
def create_image_content(image, maxdim, detail_threshold):
    detail = "low" if maxdim < detail_threshold else "high"
    return {
        "type": "image_url",
        "image_url": {"url": f"data:image/jpeg;base64,{image}", "detail": detail}
    }

# Set the system message
def set_system_message(sysmsg):
    return [{
        "role": "system",
        "content": sysmsg
    }]

# Set user message along with attached images
def set_user_message(user_msg_str, file_path_list=[], max_size_px=1024, file_names_list=None, tiled=False, detail_threshold=700):
    if not isinstance(file_path_list, list):  # Ensure the file list is a list
        file_path_list = []

    if not file_path_list:  # No files means no tiling
        tiled = False

    # Handle file names if provided
    if file_names_list and len(file_names_list) == len(file_path_list):
        file_names = file_names_list
    else:
        file_names = [os.path.basename(path) for path in file_path_list]

    # Process images to base64
    base64_images = [process_image(path, max_size_px) for path in file_path_list]

    uploaded_images_text = ""
    if file_names:
        uploaded_images_text = "\n\n---\n\nUploaded images:\n" + '\n'.join(file_names)

    # Add content based on whether we are tiling images
    if tiled:
        content = [{"type": "text", "text": user_msg_str + uploaded_images_text}]
        content += [create_image_content(image, maxdim, detail_threshold) for image, maxdim in base64_images]
        return [{"role": "user", "content": content}]
    else:
        return [{
            "role": "user",
            "content": ([user_msg_str + uploaded_images_text]
                        + [{"image": image} for image, _ in base64_images])
          }]

# Define the path to Poppler (for PDF to image conversion)
poppler_path = '/usr/bin'  # Adjust this path if needed

# Add the Poppler path to the system PATH
os.environ['PATH'] += os.pathsep + poppler_path

# Convert a PDF to images
def pdf_to_images(pdf_path, dpi=300, output_format='JPEG'):
    temp_dir = tempfile.mkdtemp()
    pages = convert_from_path(pdf_path, dpi)
    image_paths = []
    for i, page in enumerate(pages):
        image_path = os.path.join(temp_dir, f'page{i}.{output_format.lower()}')
        page.save(image_path, output_format)
        image_paths.append(image_path)
    return image_paths

# System message setup for the assistant
system_msg = """
You are an intelligent assistant tasked with extracting and validating information from French real estate syndic documents (*appel de fonds*). These documents contain financial details, property information, and owner details. Your job is to extract and ensure the correctness of the following information:

### Task Overview:
You need to extract and validate the following fields:
1. **Total à payer**: The total amount the owner must pay for the period.
2. **Fond travaux alur**: The amount allocated to the ALUR works fund.
3. **Total Part charges prévisionnelles**: The forecasted portion of charges the owner must pay for general building maintenance, collective services, etc.
4. **Part autres travaux**: Any additional expenses related to specific works or repairs.
5. **le solde précédent**: The previous balance from past transactions (can be positive or negative).
6. **Propriétaire**: The name of the property owner.
7. **Adresse du propriétaire**: The postal address of the owner.
8. **Adresse du bien**: The location of the property (address of the unit or building).
9. **Référence**: The reference number of the document or account related to the property.
10. **Date du document**: The date when the document was issued.
11. **Date limite du paiement**: The deadline by which the payment must be made.
12. **Montant total solde en notre faveur**: The total balance in favor of the syndic (if applicable).

### Validation Rule:
The following validation rules must be respected:
- **Total à payer** = **Fond travaux alur** + **Total Part charges prévisionnelles** + **Part autres travaux**.
- The amounts should be taken from the "débit" column, not the "crédit" column, to ensure accuracy. Verify that the **Total à payer** is from the correct column (débit).
- Additionally, both **Total à payer** and **Montant total solde en notre faveur** should be extracted for a cross-check to ensure that the final amounts are accurate and reflect the correct financial state.

### Format for Output:
Return the extracted information in JSON format. If there is a discrepancy (such as a mismatch between amounts or amounts found in the wrong column), return an error message in JSON format explaining the issue.
""".strip()

# Define process function to convert PDF and extract information
def process(pdf):
    image_paths = pdf_to_images(pdf)
    system = set_system_message(system_msg)
    user = set_user_message("", file_path_list=image_paths, max_size_px=1024)
    
    params = {
        "model": "gpt-4o",  # Fixed the model name
        "temperature": 0.01,
        "max_tokens": 500,
        "stream": False,
        "messages": system + user
    }
    
    start = time.perf_counter()
    try:
        response = client.chat.completions.create(**params)
        reply = response['choices'][0]['message']['content']
    except Exception as e:
        print(f"Error during API call: {e}")
        return None

    print(f"\n[elapsed: {time.perf_counter() - start:.2f} seconds]")
    return reply

# Define Gradio interface for the PDF processing
iface = gr.Interface(
    fn=process,
    inputs=PDF(label="Upload a PDF", interactive=True),
    outputs=gr.Textbox(label="Extracted Information"),
    title="Immobilier",
    description="Upload a PDF and extract the required information."
)

iface.launch(debug=True)