embedding-models
Collection
7 items
•
Updated
ONNX variant of BAAI/bge-m3
Converted model using spaces/onnx-community/convert-to-onnx.
Use the code below to get started with the model.
from huggingface_hub import snapshot_download
snapshot_download(repo_id="philipp-zettl/BAAI-bge-m3-ONNX")
from optimum.onnxruntime import ORTModelForFeatureExtraction
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("philipp-zettl/BAAI-bge-m3-ONNX")
model = ORTModelForFeatureExtraction.from_pretrained("philipp-zettl/BAAI-bge-m3-ONNX")
Example: Similarity Search
import torch
import torch.nn.functional as F
from sklearn.metrics.pairwise import cosine_similarity
def mean_pooling(model_output, attention_mask):
token_embeddings = model_output[0]
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
return (
torch.sum(token_embeddings * input_mask_expanded, 1)
/ torch.clamp(input_mask_expanded.sum(1), min=1e-9)
)
def embed(text):
encoded_input = tokenizer([text], padding=True, truncation=True, max_length=512, return_tensors='pt')
model_output = model(**encoded_input)
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
return F.normalize(sentence_embeddings, p=2, dim=1)
# from https://en.wikipedia.org/wiki/Artificial_intelligence
document_embedding = embed('''
Artificial intelligence (AI), in its broadest sense, is intelligence exhibited by machines, particularly computer systems.
It is a field of research in computer science that develops and studies methods and software that enable machines to perceive their
environment and use learning and intelligence to take actions that maximize their chances of achieving defined goals.[1]
Such machines may be called AIs.
''')
cosine_similarity(
document_embedding,
embed('A text about technology')
)
Base model
BAAI/bge-m3