ONNX O4 Version of BGE-RERANKER-V2
pairs = [['Odio comer manzana.','I reallly like eating apple'],['I reallly like eating apple', 'Realmente me gusta comer manzana.'], ['I reallly like eating apple', 'I hate apples'],['Las manzanas son geniales.','Realmente me gusta comer manzana.']]
from optimum.onnxruntime import ORTModelForFeatureExtraction,ORTModelForSequenceClassification
from transformers import AutoTokenizer
model_checkpoint = "onnxO4_bge_reranker_v2_m3"
ort_model = ORTModelForSequenceClassification.from_pretrained(model_checkpoint)
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
# ONNX Results
import torch
with torch.no_grad():
inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', max_length=512)
scores = ort_model(**inputs, return_dict=True).logits.view(-1, ).float()
print(scores)
## tensor([ -9.5081, -3.9569, -10.8632, 0.3756])
# Original non quantized
from transformers import AutoModelForSequenceClassification, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-reranker-v2-m3')
model = AutoModelForSequenceClassification.from_pretrained('BAAI/bge-reranker-v2-m3')
model.eval()
with torch.no_grad():
inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', max_length=512)
scores = model(**inputs, return_dict=True).logits.view(-1, ).float()
print(scores)
## tensor([ -9.4973, -3.9538, -10.8504, 0.3660])
- Downloads last month
- 59
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.