File size: 1,093 Bytes
2350be5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b4bf28
2350be5
 
 
 
bcc8503
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36

import io
import os
from PIL import Image
from azure.storage.blob import ContainerClient


def get_file_from_azure_blob_storage(path):
    AZURE_SAS_URL_TRD = os.environ["AZURE_SAS_URL_TRD"]
    container_client = ContainerClient.from_container_url(AZURE_SAS_URL_TRD)
    blob_client = container_client.get_blob_client(path)
    stream = blob_client.download_blob().readall()
    file_object = io.BytesIO(stream)
    return file_object


def get_image_from_azure_blob_storage(path):
    base_path = "climateqa/documents/"
    path = os.path.join(base_path, path)
    file_object = get_file_from_azure_blob_storage(path)
    image = Image.open(file_object)
    return image

def remove_duplicates_keep_highest_score(documents):
    unique_docs = {}
    
    for doc in documents:
        doc_id = doc.metadata.get('doc_id')
        if doc_id in unique_docs:
            if doc.metadata['reranking_score'] > unique_docs[doc_id].metadata['reranking_score']:
                unique_docs[doc_id] = doc
        else:
            unique_docs[doc_id] = doc
    
    return list(unique_docs.values())