Gengzigang commited on
Commit
d683b1e
1 Parent(s): f489540
Files changed (1) hide show
  1. README.md +3 -3
README.md CHANGED
@@ -70,14 +70,14 @@ processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14-33
70
  model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-336" # or /path/to/local/LLM2CLIP-Openai-L-14-336
71
  model = AutoModel.from_pretrained(
72
  model_name_or_path,
73
- torch_dtype=torch.float16,
74
  trust_remote_code=True).to('cuda').eval()
75
 
76
  llm_model_name = 'microsoft/LLM2CLIP-Llama-3-8B-Instruct-CC-Finetuned'
77
  config = AutoConfig.from_pretrained(
78
  llm_model_name, trust_remote_code=True
79
  )
80
- llm_model = AutoModel.from_pretrained(llm_model_name, config=config,trust_remote_code=True)
81
  tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
82
  llm_model.config._name_or_path = 'meta-llama/Meta-Llama-3-8B-Instruct' # Workaround for LLM2VEC
83
  l2v = LLM2Vec(llm_model, tokenizer, pooling_mode="mean", max_length=512, doc_max_length=512)
@@ -87,10 +87,10 @@ image_path = "CLIP.png"
87
 
88
  image = Image.open(image_path)
89
  input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')
 
90
 
91
  with torch.no_grad(), torch.cuda.amp.autocast():
92
  image_features = model.get_image_features(input_pixels)
93
- text_features = l2v.encode(captions, convert_to_tensor=True).to('cuda')
94
  text_features = model.get_text_features(text_features)
95
 
96
  image_features /= image_features.norm(dim=-1, keepdim=True)
 
70
  model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-336" # or /path/to/local/LLM2CLIP-Openai-L-14-336
71
  model = AutoModel.from_pretrained(
72
  model_name_or_path,
73
+ torch_dtype=torch.bfloat16,
74
  trust_remote_code=True).to('cuda').eval()
75
 
76
  llm_model_name = 'microsoft/LLM2CLIP-Llama-3-8B-Instruct-CC-Finetuned'
77
  config = AutoConfig.from_pretrained(
78
  llm_model_name, trust_remote_code=True
79
  )
80
+ llm_model = AutoModel.from_pretrained(llm_model_name, torch.bfloat16, config=config, trust_remote_code=True)
81
  tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
82
  llm_model.config._name_or_path = 'meta-llama/Meta-Llama-3-8B-Instruct' # Workaround for LLM2VEC
83
  l2v = LLM2Vec(llm_model, tokenizer, pooling_mode="mean", max_length=512, doc_max_length=512)
 
87
 
88
  image = Image.open(image_path)
89
  input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')
90
+ text_features = l2v.encode(captions, convert_to_tensor=True).to('cuda')
91
 
92
  with torch.no_grad(), torch.cuda.amp.autocast():
93
  image_features = model.get_image_features(input_pixels)
 
94
  text_features = model.get_text_features(text_features)
95
 
96
  image_features /= image_features.norm(dim=-1, keepdim=True)