Spaces:

sohojoe
/

soho-clip-embeddings-explorer

Running

File size: 4,910 Bytes

334dcac
 
 
e4bcc80
55f430c
334dcac
 
 
 
55f430c
 
 
 
 
334dcac
 
 
 
 
 
55f430c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2afa949
55f430c
 
 
334dcac
 
2afa949
 
 
55f430c
2afa949
55f430c
 
 
 
2afa949
 
 
e4bcc80
2afa949
334dcac
 
 
55f430c
2afa949
 
 
 
 
 
 
 
 
55f430c
2afa949
55f430c
 
 
 
334dcac
55f430c
334dcac
 
 
 
 
 
 
 
 
 
 
 
2afa949
55f430c
 
 
334dcac
 
55f430c
334dcac
 
 
 
55f430c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2afa949
55f430c

# File name: graph_client.py
from concurrent.futures import ThreadPoolExecutor
import json
import os
import numpy as np
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
import time

import torch

# hack for debugging, set HTTP_ADDRESS to "http://127.0.0.1:8000/"
# os.environ["HTTP_ADDRESS"] = "http://192.168.7.79:8000"

test_image_url = "https://static.wixstatic.com/media/4d6b49_42b9435ce1104008b1b5f7a3c9bfcd69~mv2.jpg/v1/fill/w_454,h_333,fp_0.50_0.50,q_90/4d6b49_42b9435ce1104008b1b5f7a3c9bfcd69~mv2.jpg"
english_text = (
    "It was the best of times, it was the worst of times, it was the age "
    "of wisdom, it was the age of foolishness, it was the epoch of belief"
)

clip_model="ViT-L/14"
clip_model_id ="laion5B-L-14"
device = "cuda:0" if torch.cuda.is_available() else "cpu"
print ("using device", device)
from clip_retrieval.load_clip import load_clip, get_tokenizer 
# from clip_retrieval.clip_client import ClipClient, Modality
model, preprocess = load_clip(clip_model, use_jit=True, device=device)
tokenizer = get_tokenizer(clip_model)

def preprocess_image(image_url):
    # download image from url
    import requests
    from PIL import Image
    from io import BytesIO
    response = requests.get(test_image_url)
    input_image = Image.open(BytesIO(response.content))
    input_image = input_image.convert('RGB')
    # convert image to numpy array
    input_image = np.array(input_image)
    input_im = Image.fromarray(input_image)
    prepro = preprocess(input_im).unsqueeze(0).cpu()
    return prepro

preprocessed_image = preprocess_image(test_image_url)

def send_text_request(number):
    payload = {
        "text": ('str', english_text, 'application/octet-stream'),
    }
    url = os.environ.get("HTTP_ADDRESS", "http://127.0.0.1:8000/")
    response = requests.post(url, files=payload)
    embeddings = response.text
    return number, embeddings

def send_image_url_request(number):
    payload = {
        "image_url": ('str', test_image_url, 'application/octet-stream'),
    }
    url = os.environ.get("HTTP_ADDRESS", "http://127.0.0.1:8000/")
    response = requests.post(url, files=payload)
    embeddings = response.text
    return number, embeddings

def send_preprocessed_image_request(number):
    key = "preprocessed_image"
    data_bytes = preprocessed_image.numpy().tobytes()
    shape_bytes = np.array(preprocessed_image.shape).tobytes()
    dtype_bytes = str(preprocessed_image.dtype).encode()
    payload = {
        key: ('tensor', data_bytes, 'application/octet-stream'),
        'shape': ('shape', shape_bytes, 'application/octet-stream'),
        'dtype': ('dtype', dtype_bytes, 'application/octet-stream'),
    }
    url = os.environ.get("HTTP_ADDRESS", "http://127.0.0.1:8000/")
    response = requests.post(url, files=payload)
    embeddings = response.text
    return number, embeddings

def process(numbers, send_func, max_workers=10):
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(send_func, number) for number in numbers]
        for future in as_completed(futures):
            n_result, result = future.result()
            result = json.loads(result)
            print (f"{n_result} : {len(result[0])}")

# def process_text(numbers, max_workers=10):
#     for n in numbers:
#         n_result, result = send_text_request(n)
#         result = json.loads(result)
#         print (f"{n_result} : {len(result[0])}")

if __name__ == "__main__":
    n_calls = 300

    # test text
    # n_calls = 1
    numbers = list(range(n_calls))
    start_time = time.monotonic()
    process(numbers, send_text_request)
    end_time = time.monotonic()
    total_time = end_time - start_time
    avg_time_ms = total_time / n_calls * 1000
    calls_per_sec = n_calls / total_time
    print(f"Text...")
    print(f" Average time taken: {avg_time_ms:.2f} ms")
    print(f" Number of calls per second: {calls_per_sec:.2f}")    

    # test image url
    # n_calls = 1
    numbers = list(range(n_calls))
    start_time = time.monotonic()
    process(numbers, send_image_url_request)
    end_time = time.monotonic()
    total_time = end_time - start_time
    avg_time_ms = total_time / n_calls * 1000
    calls_per_sec = n_calls / total_time
    print(f"Image passing url...")
    print(f" Average time taken: {avg_time_ms:.2f} ms")
    print(f" Number of calls per second: {calls_per_sec:.2f}")    

    # test image as vector
    # n_calls = 1
    numbers = list(range(n_calls))
    start_time = time.monotonic()
    process(numbers, send_preprocessed_image_request)
    end_time = time.monotonic()
    total_time = end_time - start_time
    avg_time_ms = total_time / n_calls * 1000
    calls_per_sec = n_calls / total_time
    print(f"Preprocessed image...")
    print(f" Average time taken: {avg_time_ms:.2f} ms")
    print(f" Number of calls per second: {calls_per_sec:.2f}")