File size: 1,093 Bytes
2350be5 4b4bf28 2350be5 bcc8503 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import io
import os
from PIL import Image
from azure.storage.blob import ContainerClient
def get_file_from_azure_blob_storage(path):
AZURE_SAS_URL_TRD = os.environ["AZURE_SAS_URL_TRD"]
container_client = ContainerClient.from_container_url(AZURE_SAS_URL_TRD)
blob_client = container_client.get_blob_client(path)
stream = blob_client.download_blob().readall()
file_object = io.BytesIO(stream)
return file_object
def get_image_from_azure_blob_storage(path):
base_path = "climateqa/documents/"
path = os.path.join(base_path, path)
file_object = get_file_from_azure_blob_storage(path)
image = Image.open(file_object)
return image
def remove_duplicates_keep_highest_score(documents):
unique_docs = {}
for doc in documents:
doc_id = doc.metadata.get('doc_id')
if doc_id in unique_docs:
if doc.metadata['reranking_score'] > unique_docs[doc_id].metadata['reranking_score']:
unique_docs[doc_id] = doc
else:
unique_docs[doc_id] = doc
return list(unique_docs.values())
|