ZNV Embedding utilizes a 6B LLM (Large Language Model) for embedding, achieving excellent embedding results.
In a single inference, we used two prompts to extract two different embeddings for a sentence, and then concatenated them.
Model usage method:
- Define ZNVEmbeddingModel
import os
from transformers import (
LlamaForCausalLM,
LlamaTokenizer, AutoConfig,
)
import torch
import torch.nn.functional as F
import numpy as np
class ZNVEmbeddingModel(torch.nn.Module):
def __init__(self, model_name_or_path):
super(ZNVEmbeddingModel, self).__init__()
self.prompt_prefix = "阅读下文,然后答题\n"
self.prompt_suffixes = ["\n1.一个字总结上文的意思是:",
"\n2.上文深层次的意思是:"]
self.hidden_size = 4096
self.model_name_or_path = model_name_or_path
self.linear_suffixes = torch.nn.ModuleList(
[torch.nn.Linear(self.hidden_size, self.hidden_size//len(self.prompt_suffixes))
for _ in range(len(self.prompt_suffixes))])
self.tokenizer, self.llama = self.load_llama()
self.tanh = torch.nn.Tanh()
self.suffixes_ids = []
self.suffixes_ids_len = []
self.suffixes_len = 0
for suffix in self.prompt_suffixes:
ids = self.tokenizer(suffix, return_tensors="pt")["input_ids"].tolist()[0]
self.suffixes_ids += ids
self.suffixes_ids_len.append(len(ids))
self.suffixes_len += len(ids)
self.suffixes_ones = torch.ones(self.suffixes_len)
self.suffixes_ids = torch.tensor(self.suffixes_ids)
linear_file = os.path.join(model_name_or_path, "linears")
load_layers = torch.load(linear_file)
model_state = self.state_dict()
model_state.update(load_layers)
self.load_state_dict(model_state, strict=False)
def load_llama(self):
llm_path = os.path.join(self.model_name_or_path)
config = AutoConfig.from_pretrained(llm_path)
tokenizer = LlamaTokenizer.from_pretrained(self.model_name_or_path)
tokenizer.padding_side = "left"
model = LlamaForCausalLM.from_pretrained(
llm_path,
config=config,
low_cpu_mem_usage=True
)
model.config.use_cache = False
return tokenizer, model
def forward(self, sentences):
prompts_embeddings = []
sentences = [self.prompt_prefix + s for s in sentences]
inputs = self.tokenizer(sentences, max_length=256, padding=True, truncation=True,
return_tensors='pt')
attention_mask = inputs["attention_mask"]
input_ids = inputs["input_ids"]
batch_size = len(sentences)
suffixes_ones = self.suffixes_ones.unsqueeze(0)
suffixes_ones = suffixes_ones.repeat(batch_size, 1)
device = next(self.parameters()).device
attention_mask = torch.cat([attention_mask, suffixes_ones], dim=-1).to(device)
suffixes_ids = self.suffixes_ids.unsqueeze(0)
suffixes_ids = suffixes_ids.repeat(batch_size, 1)
input_ids = torch.cat([input_ids, suffixes_ids], dim=-1).to(device)
last_hidden_state = self.llama.base_model.base_model(attention_mask=attention_mask, input_ids=input_ids).last_hidden_state
index = -1
for i in range(len(self.suffixes_ids_len)):
embedding = last_hidden_state[:, index, :]
embedding = self.linear_suffixes[i](embedding)
prompts_embeddings.append(embedding)
index -= self.suffixes_ids_len[-i-1]
output_embedding = torch.cat(prompts_embeddings, dim=-1)
output_embedding = self.tanh(output_embedding)
output_embedding = F.normalize(output_embedding, p=2, dim=1)
return output_embedding
def encode(self, sentences, batch_size=10, **kwargs):
size = len(sentences)
embeddings = None
handled = 0
while handled < size:
tokens = sentences[handled:handled + batch_size]
output_embeddings = self.forward(tokens)
result = output_embeddings.cpu().numpy()
handled += result.shape[0]
if embeddings is not None:
embeddings = np.concatenate((embeddings, result), axis=0)
else:
embeddings = result
return embeddings
- Use ZNVEmbeddingModel for Embedding.
znv_model = ZNVEmbeddingModel("your_model_path")
znv_model.eval()
with torch.no_grad():
output = znv_model(["请问你的电话号码是多少?","可以告诉我你的手机号吗?"])
cos_sim = F.cosine_similarity(output[0],output[1],dim=0)
print(cos_sim)
- Downloads last month
- 630
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.
Spaces using sentosa/ZNV-Embedding 2
Evaluation results
- cos_sim_pearson on MTEB AFQMCvalidation set self-reported53.669
- cos_sim_spearman on MTEB AFQMCvalidation set self-reported53.841
- euclidean_pearson on MTEB AFQMCvalidation set self-reported53.582
- euclidean_spearman on MTEB AFQMCvalidation set self-reported53.841
- manhattan_pearson on MTEB AFQMCvalidation set self-reported53.646
- manhattan_spearman on MTEB AFQMCvalidation set self-reported53.755
- cos_sim_pearson on MTEB ATECtest set self-reported58.124
- cos_sim_spearman on MTEB ATECtest set self-reported54.443
- euclidean_pearson on MTEB ATECtest set self-reported61.206
- euclidean_spearman on MTEB ATECtest set self-reported54.443