|
from datasets import load_dataset |
|
from sentence_transformers import SentenceTransformer |
|
from torch.nn.functional import cosine_similarity as cos_sim |
|
|
|
model_name = "jinaai/jina-embedding-b-en-v1" |
|
model = SentenceTransformer(model_name) |
|
|
|
dataset = load_dataset('jinaai/negation-dataset', split='test') |
|
anchor_embeddings = model.encode([item['anchor'] for item in dataset], convert_to_tensor=True) |
|
entailment_embeddings = model.encode([item['entailment'] for item in dataset], convert_to_tensor=True) |
|
negative_embeddings = model.encode([item['negative'] for item in dataset], convert_to_tensor=True) |
|
|
|
|
|
positive_similarities = cos_sim(anchor_embeddings, entailment_embeddings) |
|
entailment_negatives = cos_sim(negative_embeddings, entailment_embeddings) |
|
anchor_negatives = cos_sim(anchor_embeddings, negative_embeddings) |
|
entailment_score = sum(positive_similarities > entailment_negatives).item() / len(anchor_embeddings) |
|
anchor_score = sum(positive_similarities > anchor_negatives).item() / len(anchor_embeddings) |
|
print('entailment_score: ', entailment_score) |
|
print('anchor_score: ', anchor_score) |
|
|