from transformers import AutoModel, CLIPImageProcessor, CLIPTokenizer
import torch
import spaces


model_name_or_path = "BAAI/EVA-CLIP-8B" 
image_size = 224

def load_model():
    processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
    
    model = AutoModel.from_pretrained(
        model_name_or_path, 
        torch_dtype=torch.bfloat16,
        trust_remote_code=True).to('cuda').eval()
    
    
    tokenizer = CLIPTokenizer.from_pretrained(model_name_or_path)
    return model, tokenizer, processor

load_model()