Gengzigang
commited on
Commit
•
d683b1e
1
Parent(s):
f489540
update
Browse files
README.md
CHANGED
@@ -70,14 +70,14 @@ processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14-33
|
|
70 |
model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-336" # or /path/to/local/LLM2CLIP-Openai-L-14-336
|
71 |
model = AutoModel.from_pretrained(
|
72 |
model_name_or_path,
|
73 |
-
torch_dtype=torch.
|
74 |
trust_remote_code=True).to('cuda').eval()
|
75 |
|
76 |
llm_model_name = 'microsoft/LLM2CLIP-Llama-3-8B-Instruct-CC-Finetuned'
|
77 |
config = AutoConfig.from_pretrained(
|
78 |
llm_model_name, trust_remote_code=True
|
79 |
)
|
80 |
-
llm_model = AutoModel.from_pretrained(llm_model_name, config=config,trust_remote_code=True)
|
81 |
tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
|
82 |
llm_model.config._name_or_path = 'meta-llama/Meta-Llama-3-8B-Instruct' # Workaround for LLM2VEC
|
83 |
l2v = LLM2Vec(llm_model, tokenizer, pooling_mode="mean", max_length=512, doc_max_length=512)
|
@@ -87,10 +87,10 @@ image_path = "CLIP.png"
|
|
87 |
|
88 |
image = Image.open(image_path)
|
89 |
input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')
|
|
|
90 |
|
91 |
with torch.no_grad(), torch.cuda.amp.autocast():
|
92 |
image_features = model.get_image_features(input_pixels)
|
93 |
-
text_features = l2v.encode(captions, convert_to_tensor=True).to('cuda')
|
94 |
text_features = model.get_text_features(text_features)
|
95 |
|
96 |
image_features /= image_features.norm(dim=-1, keepdim=True)
|
|
|
70 |
model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-336" # or /path/to/local/LLM2CLIP-Openai-L-14-336
|
71 |
model = AutoModel.from_pretrained(
|
72 |
model_name_or_path,
|
73 |
+
torch_dtype=torch.bfloat16,
|
74 |
trust_remote_code=True).to('cuda').eval()
|
75 |
|
76 |
llm_model_name = 'microsoft/LLM2CLIP-Llama-3-8B-Instruct-CC-Finetuned'
|
77 |
config = AutoConfig.from_pretrained(
|
78 |
llm_model_name, trust_remote_code=True
|
79 |
)
|
80 |
+
llm_model = AutoModel.from_pretrained(llm_model_name, torch.bfloat16, config=config, trust_remote_code=True)
|
81 |
tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
|
82 |
llm_model.config._name_or_path = 'meta-llama/Meta-Llama-3-8B-Instruct' # Workaround for LLM2VEC
|
83 |
l2v = LLM2Vec(llm_model, tokenizer, pooling_mode="mean", max_length=512, doc_max_length=512)
|
|
|
87 |
|
88 |
image = Image.open(image_path)
|
89 |
input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')
|
90 |
+
text_features = l2v.encode(captions, convert_to_tensor=True).to('cuda')
|
91 |
|
92 |
with torch.no_grad(), torch.cuda.amp.autocast():
|
93 |
image_features = model.get_image_features(input_pixels)
|
|
|
94 |
text_features = model.get_text_features(text_features)
|
95 |
|
96 |
image_features /= image_features.norm(dim=-1, keepdim=True)
|