|
import json |
|
import os |
|
import numpy as np |
|
import requests |
|
from concurrent.futures import ThreadPoolExecutor, as_completed |
|
from PIL import Image |
|
from io import BytesIO |
|
import torch |
|
|
|
from clip_retrieval.load_clip import load_clip, get_tokenizer |
|
|
|
|
|
class ClipAppClient: |
|
""" |
|
A class to handle generating embeddings using the OpenAI CLIP model. |
|
|
|
app_client = ClipAppClient() |
|
|
|
test_image_url = "https://example.com/image.jpg" |
|
preprocessed_image = app_client.preprocess_image(test_image_url) |
|
|
|
text = "A beautiful landscape" |
|
text_embeddings = app_client.text_to_embedding(text) |
|
|
|
image_embeddings = app_client.image_url_to_embedding(test_image_url) |
|
|
|
preprocessed_image_embeddings = app_client.preprocessed_image_to_embedding(preprocessed_image) |
|
""" |
|
|
|
def __init__(self, clip_model="ViT-L/14", device=None): |
|
|
|
self.clip_model = clip_model |
|
self.device = device or ("cuda:0" if torch.cuda.is_available() else "cpu") |
|
print("using device", self.device) |
|
_, self.preprocess = load_clip(clip_model, use_jit=True, device=self.device) |
|
self.tokenizer = get_tokenizer(clip_model) |
|
|
|
def preprocess_image(self, image_url): |
|
""" |
|
Preprocess an image from a given URL. |
|
|
|
:param image_url: str, URL of the image to preprocess |
|
:return: torch.Tensor, preprocessed image |
|
""" |
|
if os.path.isfile(image_url): |
|
input_image = Image.open(image_url).convert('RGB') |
|
input_image = np.array(input_image) |
|
input_image = Image.fromarray(input_image) |
|
else: |
|
response = requests.get(image_url) |
|
input_image = Image.open(BytesIO(response.content)).convert('RGB') |
|
input_image = np.array(input_image) |
|
input_image = Image.fromarray(input_image) |
|
prepro = self.preprocess(input_image).unsqueeze(0).cpu() |
|
return prepro |
|
|
|
def text_to_embedding(self, text): |
|
""" |
|
Convert a given text to an embedding using the OpenAI CLIP model. |
|
|
|
:param text: str, text to convert to an embedding |
|
:return: str, text embeddings |
|
""" |
|
payload = { |
|
"text": ('str', text, 'application/octet-stream'), |
|
} |
|
url = os.environ.get("HTTP_ADDRESS", "http://127.0.0.1:8000/") |
|
response = requests.post(url, files=payload) |
|
embeddings = response.text |
|
embeddings = json.loads(embeddings) |
|
embeddings = torch.tensor(embeddings) |
|
return embeddings |
|
|
|
def image_url_to_embedding(self, image_url): |
|
""" |
|
Convert an image URL to an embedding using the OpenAI CLIP model. |
|
|
|
:param image_url: str, URL of the image to convert to an embedding |
|
:return: str, image embeddings |
|
""" |
|
payload = { |
|
"image_url": ('str', image_url, 'application/octet-stream'), |
|
} |
|
url = os.environ.get("HTTP_ADDRESS", "http://127.0.0.1:8000/") |
|
response = requests.post(url, files=payload) |
|
embeddings = response.text |
|
embeddings = json.loads(embeddings) |
|
embeddings = torch.tensor(embeddings) |
|
return embeddings |
|
|
|
def preprocessed_image_to_embedding(self, image): |
|
""" |
|
Convert a preprocessed image to an embedding using the OpenAI CLIP model. |
|
|
|
:param image: torch.Tensor, preprocessed image |
|
:return: str, image embeddings |
|
""" |
|
key = "preprocessed_image" |
|
data_bytes = image.numpy().tobytes() |
|
shape_bytes = np.array(image.shape).tobytes() |
|
dtype_bytes = str(image.dtype).encode() |
|
payload = { |
|
key: ('tensor', data_bytes, 'application/octet-stream'), |
|
'shape': ('shape', shape_bytes, 'application/octet-stream'), |
|
'dtype': ('dtype', dtype_bytes, 'application/octet-stream'), |
|
} |
|
url = os.environ.get("HTTP_ADDRESS", "http://127.0.0.1:8000/") |
|
response = requests.post(url, files=payload) |
|
embeddings = response.text |
|
embeddings = json.loads(embeddings) |
|
embeddings = torch.tensor(embeddings) |
|
return embeddings |
|
|
|
|