ZNV Embedding utilizes a 6B LLM (Large Language Model) for embedding, achieving excellent embedding results.
In a single inference, we used two prompts to extract two different embeddings for a sentence, and then concatenated them.
Model usage method:
- Define ZNVEmbeddingModel
import os
from transformers import (
LlamaForCausalLM,
LlamaTokenizer, AutoConfig,
)
import torch
import torch.nn.functional as F
import numpy as np
class ZNVEmbeddingModel(torch.nn.Module):
def __init__(self, model_name_or_path):
super(ZNVEmbeddingModel, self).__init__()
self.prompt_prefix = "阅读下文,然后答题\n"
self.prompt_suffixes = ["\n1.一个字总结上文的意思是:",
"\n2.上文深层次的意思是:"]
self.hidden_size = 4096
self.model_name_or_path = model_name_or_path
self.linear_suffixes = torch.nn.ModuleList(
[torch.nn.Linear(self.hidden_size, self.hidden_size//len(self.prompt_suffixes))
for _ in range(len(self.prompt_suffixes))])
self.tokenizer, self.llama = self.load_llama()
self.tanh = torch.nn.Tanh()
self.suffixes_ids = []
self.suffixes_ids_len = []
self.suffixes_len = 0
for suffix in self.prompt_suffixes:
ids = self.tokenizer(suffix, return_tensors="pt")["input_ids"].tolist()[0]
self.suffixes_ids += ids
self.suffixes_ids_len.append(len(ids))
self.suffixes_len += len(ids)
self.suffixes_ones = torch.ones(self.suffixes_len)
self.suffixes_ids = torch.tensor(self.suffixes_ids)
linear_file = os.path.join(model_name_or_path, "linears")
load_layers = torch.load(linear_file)
model_state = self.state_dict()
model_state.update(load_layers)
self.load_state_dict(model_state, strict=False)
def load_llama(self):
llm_path = os.path.join(self.model_name_or_path)
config = AutoConfig.from_pretrained(llm_path)
tokenizer = LlamaTokenizer.from_pretrained(self.model_name_or_path)
tokenizer.padding_side = "left"
model = LlamaForCausalLM.from_pretrained(
llm_path,
config=config,
low_cpu_mem_usage=True
)
model.config.use_cache = False
return tokenizer, model
def forward(self, sentences):
prompts_embeddings = []
sentences = [self.prompt_prefix + s for s in sentences]
inputs = self.tokenizer(sentences, max_length=256, padding=True, truncation=True,
return_tensors='pt')
attention_mask = inputs["attention_mask"]
input_ids = inputs["input_ids"]
batch_size = len(sentences)
suffixes_ones = self.suffixes_ones.unsqueeze(0)
suffixes_ones = suffixes_ones.repeat(batch_size, 1)
device = next(self.parameters()).device
attention_mask = torch.cat([attention_mask, suffixes_ones], dim=-1).to(device)
suffixes_ids = self.suffixes_ids.unsqueeze(0)
suffixes_ids = suffixes_ids.repeat(batch_size, 1)
input_ids = torch.cat([input_ids, suffixes_ids], dim=-1).to(device)
last_hidden_state = self.llama.base_model.base_model(attention_mask=attention_mask, input_ids=input_ids).last_hidden_state
index = -1
for i in range(len(self.suffixes_ids_len)):
embedding = last_hidden_state[:, index, :]
embedding = self.linear_suffixes[i](embedding)
prompts_embeddings.append(embedding)
index -= self.suffixes_ids_len[-i-1]
output_embedding = torch.cat(prompts_embeddings, dim=-1)
output_embedding = self.tanh(output_embedding)
output_embedding = F.normalize(output_embedding, p=2, dim=1)
return output_embedding
def encode(self, sentences, batch_size=10, **kwargs):
size = len(sentences)
embeddings = None
handled = 0
while handled < size:
tokens = sentences[handled:handled + batch_size]
output_embeddings = self.forward(tokens)
result = output_embeddings.cpu().numpy()
handled += result.shape[0]
if embeddings is not None:
embeddings = np.concatenate((embeddings, result), axis=0)
else:
embeddings = result
return embeddings
- Use ZNVEmbeddingModel for Embedding.
znv_model = ZNVEmbeddingModel("your_model_path")
znv_model.eval()
with torch.no_grad():
output = znv_model(["请问你的电话号码是多少?","可以告诉我你的手机号吗?"])
cos_sim = F.cosine_similarity(output[0],output[1],dim=0)
print(cos_sim)
- Downloads last month
- 480
Inference Providers
NEW
This model is not currently available via any of the supported third-party Inference Providers, and
the model is not deployed on the HF Inference API.
Spaces using sentosa/ZNV-Embedding 2
Evaluation results
- cos_sim_pearson on MTEB AFQMCvalidation set self-reported53.669
- cos_sim_spearman on MTEB AFQMCvalidation set self-reported53.841
- euclidean_pearson on MTEB AFQMCvalidation set self-reported53.582
- euclidean_spearman on MTEB AFQMCvalidation set self-reported53.841
- manhattan_pearson on MTEB AFQMCvalidation set self-reported53.646
- manhattan_spearman on MTEB AFQMCvalidation set self-reported53.755
- cos_sim_pearson on MTEB ATECtest set self-reported58.124
- cos_sim_spearman on MTEB ATECtest set self-reported54.443
- euclidean_pearson on MTEB ATECtest set self-reported61.206
- euclidean_spearman on MTEB ATECtest set self-reported54.443