Edit model card

ONNX O4 Version of BGE-RERANKER-V2

pairs = [['Odio comer manzana.','I reallly like eating apple'],['I reallly like eating apple', 'Realmente me gusta comer manzana.'], ['I reallly like eating apple', 'I hate apples'],['Las manzanas son geniales.','Realmente me gusta comer manzana.']]

from optimum.onnxruntime import ORTModelForFeatureExtraction,ORTModelForSequenceClassification
from transformers import AutoTokenizer

model_checkpoint = "onnxO4_bge_reranker_v2_m3"

ort_model = ORTModelForSequenceClassification.from_pretrained(model_checkpoint)
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# ONNX Results
import torch


with torch.no_grad():
    inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', max_length=512)
    scores = ort_model(**inputs, return_dict=True).logits.view(-1, ).float()
    print(scores)

## tensor([ -9.5081,  -3.9569, -10.8632,   0.3756])

# Original non quantized

from transformers import AutoModelForSequenceClassification, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-reranker-v2-m3')
model = AutoModelForSequenceClassification.from_pretrained('BAAI/bge-reranker-v2-m3')
model.eval()

with torch.no_grad():
    inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', max_length=512)
    scores = model(**inputs, return_dict=True).logits.view(-1, ).float()
    print(scores)

## tensor([ -9.4973,  -3.9538, -10.8504,   0.3660])
Downloads last month
59
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.