metadata

tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - generated_from_trainer
  - dataset_size:234000
  - loss:MSELoss
base_model: google-bert/bert-base-multilingual-cased
widget:
  - source_sentence: who sings in spite of ourselves with john prine
    sentences:
      - es
      - når ble michael jordan draftet til nba
      - quien canta en spite of ourselves con john prine
  - source_sentence: who wrote when you look me in the eyes
    sentences:
      - متى بدأت الفتاة الكشفية في بيع ملفات تعريف الارتباط
      - A écrit when you look me in the eyes
      - fr
  - source_sentence: when was fathers day made a national holiday
    sentences:
      - wann wurde der Vatertag zum nationalen Feiertag
      - de
      - ' អ្នកណាច្រៀង i want to sing you a love song'
  - source_sentence: what is the density of the continental crust
    sentences:
      - cuál es la densidad de la corteza continental
      - wie zingt i want to sing you a love song
      - es
  - source_sentence: who wrote the song i shot the sheriff
    sentences:
      - Quel est l'âge légal pour consommer du vin au Canada?
      - i shot the sheriff şarkısını kim besteledi
      - tr
pipeline_tag: sentence-similarity
library_name: sentence-transformers
metrics:
  - negative_mse
model-index:
  - name: SentenceTransformer based on google-bert/bert-base-multilingual-cased
    results:
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to ar
          type: MSE-val-en-to-ar
        metrics:
          - type: negative_mse
            value: -18.93259286880493
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to da
          type: MSE-val-en-to-da
        metrics:
          - type: negative_mse
            value: -15.68576693534851
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to de
          type: MSE-val-en-to-de
        metrics:
          - type: negative_mse
            value: -16.125640273094177
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to en
          type: MSE-val-en-to-en
        metrics:
          - type: negative_mse
            value: -13.388358056545258
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to es
          type: MSE-val-en-to-es
        metrics:
          - type: negative_mse
            value: -15.648126602172852
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to fi
          type: MSE-val-en-to-fi
        metrics:
          - type: negative_mse
            value: -17.174141108989716
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to fr
          type: MSE-val-en-to-fr
        metrics:
          - type: negative_mse
            value: -15.814268589019775
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to he
          type: MSE-val-en-to-he
        metrics:
          - type: negative_mse
            value: -18.483880162239075
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to hu
          type: MSE-val-en-to-hu
        metrics:
          - type: negative_mse
            value: -17.58536398410797
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to it
          type: MSE-val-en-to-it
        metrics:
          - type: negative_mse
            value: -15.706634521484375
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to ja
          type: MSE-val-en-to-ja
        metrics:
          - type: negative_mse
            value: -17.800691723823547
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to ko
          type: MSE-val-en-to-ko
        metrics:
          - type: negative_mse
            value: -19.26662176847458
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to km
          type: MSE-val-en-to-km
        metrics:
          - type: negative_mse
            value: -28.38749885559082
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to ms
          type: MSE-val-en-to-ms
        metrics:
          - type: negative_mse
            value: -15.783128142356873
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to nl
          type: MSE-val-en-to-nl
        metrics:
          - type: negative_mse
            value: -15.027229487895966
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to no
          type: MSE-val-en-to-no
        metrics:
          - type: negative_mse
            value: -15.598368644714355
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to pl
          type: MSE-val-en-to-pl
        metrics:
          - type: negative_mse
            value: -16.64138436317444
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to pt
          type: MSE-val-en-to-pt
        metrics:
          - type: negative_mse
            value: -15.76906442642212
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to ru
          type: MSE-val-en-to-ru
        metrics:
          - type: negative_mse
            value: -16.91163182258606
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to sv
          type: MSE-val-en-to-sv
        metrics:
          - type: negative_mse
            value: -15.555775165557861
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to th
          type: MSE-val-en-to-th
        metrics:
          - type: negative_mse
            value: -18.37025284767151
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to tr
          type: MSE-val-en-to-tr
        metrics:
          - type: negative_mse
            value: -16.945864260196686
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to vi
          type: MSE-val-en-to-vi
        metrics:
          - type: negative_mse
            value: -16.482776403427124
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to zh cn
          type: MSE-val-en-to-zh_cn
        metrics:
          - type: negative_mse
            value: -16.996394097805023
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to zh hk
          type: MSE-val-en-to-zh_hk
        metrics:
          - type: negative_mse
            value: -16.82070791721344
            name: Negative Mse
      - task:
          type: knowledge-distillation
          name: Knowledge Distillation
        dataset:
          name: MSE val en to zh tw
          type: MSE-val-en-to-zh_tw
        metrics:
          - type: negative_mse
            value: -17.381685972213745
            name: Negative Mse

SentenceTransformer based on google-bert/bert-base-multilingual-cased

This is a sentence-transformers model finetuned from google-bert/bert-base-multilingual-cased. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

Model Type: Sentence Transformer
Base model: google-bert/bert-base-multilingual-cased
Maximum Sequence Length: 128 tokens
Output Dimensionality: 768 dimensions
Similarity Function: Cosine Similarity

Model Sources

Documentation: Sentence Transformers Documentation
Repository: Sentence Transformers on GitHub
Hugging Face: Sentence Transformers on Hugging Face

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("luanafelbarros/bert-base-multilingual-cased-matryoshka-mkqa")
# Run inference
sentences = [
    'who wrote the song i shot the sheriff',
    'i shot the sheriff şarkısını kim besteledi',
    'tr',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Evaluation

Metrics

Knowledge Distillation

Datasets: MSE-val-en-to-ar, MSE-val-en-to-da, MSE-val-en-to-de, MSE-val-en-to-en, MSE-val-en-to-es, MSE-val-en-to-fi, MSE-val-en-to-fr, MSE-val-en-to-he, MSE-val-en-to-hu, MSE-val-en-to-it, MSE-val-en-to-ja, MSE-val-en-to-ko, MSE-val-en-to-km, MSE-val-en-to-ms, MSE-val-en-to-nl, MSE-val-en-to-no, MSE-val-en-to-pl, MSE-val-en-to-pt, MSE-val-en-to-ru, MSE-val-en-to-sv, MSE-val-en-to-th, MSE-val-en-to-tr, MSE-val-en-to-vi, MSE-val-en-to-zh_cn, MSE-val-en-to-zh_hk and MSE-val-en-to-zh_tw
Evaluated with MSEEvaluator

Metric	MSE-val-en-to-ar	MSE-val-en-to-da	MSE-val-en-to-de	MSE-val-en-to-en	MSE-val-en-to-es	MSE-val-en-to-fi	MSE-val-en-to-fr	MSE-val-en-to-he	MSE-val-en-to-hu	MSE-val-en-to-it	MSE-val-en-to-ja	MSE-val-en-to-ko	MSE-val-en-to-km	MSE-val-en-to-ms	MSE-val-en-to-nl	MSE-val-en-to-no	MSE-val-en-to-pl	MSE-val-en-to-pt	MSE-val-en-to-ru	MSE-val-en-to-sv	MSE-val-en-to-th	MSE-val-en-to-tr	MSE-val-en-to-vi	MSE-val-en-to-zh_cn	MSE-val-en-to-zh_hk	MSE-val-en-to-zh_tw
negative_mse	-18.9326	-15.6858	-16.1256	-13.3884	-15.6481	-17.1741	-15.8143	-18.4839	-17.5854	-15.7066	-17.8007	-19.2666	-28.3875	-15.7831	-15.0272	-15.5984	-16.6414	-15.7691	-16.9116	-15.5558	-18.3703	-16.9459	-16.4828	-16.9964	-16.8207	-17.3817

Training Details

Training Dataset

Unnamed Dataset

Size: 234,000 training samples
Columns: english, non-english, target, and label

Approximate statistics based on the first 1000 samples:

	english	non-english	target	label
type	string	string	string	list
details	min: 10 tokens mean: 12.34 tokens max: 18 tokens	min: 3 tokens mean: 14.41 tokens max: 49 tokens	min: 3 tokens mean: 3.38 tokens max: 7 tokens	size: 768 elements

Samples:

english	non-english	target	label
`who plays hope on days of our lives`	`من الذي يلعب الأمل في أيام حياتنا`	`ar`	`[0.2171212136745453, 0.5138550996780396, 0.5517176389694214, -1.0655105113983154, 1.5853567123413086, ...]`
`who plays hope on days of our lives`	`hvem spiller hope i Horton-sagaen`	`da`	`[0.2171212136745453, 0.5138550996780396, 0.5517176389694214, -1.0655105113983154, 1.5853567123413086, ...]`
`who plays hope on days of our lives`	`Wer spielt die Hope in Zeit der Sehnsucht?`	`de`	`[0.2171212136745453, 0.5138550996780396, 0.5517176389694214, -1.0655105113983154, 1.5853567123413086, ...]`

Loss: MSELoss

Evaluation Dataset

Unnamed Dataset

Size: 13,000 evaluation samples
Columns: english, non-english, target, and label

Approximate statistics based on the first 1000 samples:

	english	non-english	target	label
type	string	string	string	list
details	min: 10 tokens mean: 12.44 tokens max: 16 tokens	min: 3 tokens mean: 14.48 tokens max: 49 tokens	min: 3 tokens mean: 3.38 tokens max: 7 tokens	size: 768 elements

Samples:

english	non-english	target	label
`who played prudence on nanny and the professor`	`من لعب دور "prudence" فى "nanny and the professor"`	`ar`	`[-0.2837616801261902, -0.4943353235721588, 0.020107418298721313, 0.7796109318733215, -0.47365888953208923, ...]`
`who played prudence on nanny and the professor`	`hvem spiller prudence på nanny and the professor`	`da`	`[-0.2837616801261902, -0.4943353235721588, 0.020107418298721313, 0.7796109318733215, -0.47365888953208923, ...]`
`who played prudence on nanny and the professor`	`Wer spielte Prudence in Nanny and the Professor`	`de`	`[-0.2837616801261902, -0.4943353235721588, 0.020107418298721313, 0.7796109318733215, -0.47365888953208923, ...]`

Loss: MSELoss

Training Hyperparameters

Non-Default Hyperparameters

eval_strategy: steps
per_device_train_batch_size: 64
per_device_eval_batch_size: 64
learning_rate: 1e-05
num_train_epochs: 4
warmup_ratio: 0.1
fp16: True

All Hyperparameters

Click to expand

overwrite_output_dir: False
do_predict: False
eval_strategy: steps
prediction_loss_only: True
per_device_train_batch_size: 64
per_device_eval_batch_size: 64
per_gpu_train_batch_size: None
per_gpu_eval_batch_size: None
gradient_accumulation_steps: 1
eval_accumulation_steps: None
torch_empty_cache_steps: None
learning_rate: 1e-05
weight_decay: 0.0
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1e-08
max_grad_norm: 1.0
num_train_epochs: 4
max_steps: -1
lr_scheduler_type: linear
lr_scheduler_kwargs: {}
warmup_ratio: 0.1
warmup_steps: 0
log_level: passive
log_level_replica: warning
log_on_each_node: True
logging_nan_inf_filter: True
save_safetensors: True
save_on_each_node: False
save_only_model: False
restore_callback_states_from_checkpoint: False
no_cuda: False
use_cpu: False
use_mps_device: False
seed: 42
data_seed: None
jit_mode_eval: False
use_ipex: False
bf16: False
fp16: True
fp16_opt_level: O1
half_precision_backend: auto
bf16_full_eval: False
fp16_full_eval: False
tf32: None
local_rank: 0
ddp_backend: None
tpu_num_cores: None
tpu_metrics_debug: False
debug: []
dataloader_drop_last: False
dataloader_num_workers: 0
dataloader_prefetch_factor: None
past_index: -1
disable_tqdm: False
remove_unused_columns: True
label_names: None
load_best_model_at_end: False
ignore_data_skip: False
fsdp: []
fsdp_min_num_params: 0
fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
fsdp_transformer_layer_cls_to_wrap: None
accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
deepspeed: None
label_smoothing_factor: 0.0
optim: adamw_torch
optim_args: None
adafactor: False
group_by_length: False
length_column_name: length
ddp_find_unused_parameters: None
ddp_bucket_cap_mb: None
ddp_broadcast_buffers: False
dataloader_pin_memory: True
dataloader_persistent_workers: False
skip_memory_metrics: True
use_legacy_prediction_loop: False
push_to_hub: False
resume_from_checkpoint: None
hub_model_id: None
hub_strategy: every_save
hub_private_repo: False
hub_always_push: False
gradient_checkpointing: False
gradient_checkpointing_kwargs: None
include_inputs_for_metrics: False
include_for_metrics: []
eval_do_concat_batches: True
fp16_backend: auto
push_to_hub_model_id: None
push_to_hub_organization: None
mp_parameters:
auto_find_batch_size: False
full_determinism: False
torchdynamo: None
ray_scope: last
ddp_timeout: 1800
torch_compile: False
torch_compile_backend: None
torch_compile_mode: None
dispatch_batches: None
split_batches: None
include_tokens_per_second: False
include_num_input_tokens_seen: False
neftune_noise_alpha: None
optim_target_modules: None
batch_eval_metrics: False
eval_on_start: False
use_liger_kernel: False
eval_use_gather_object: False
average_tokens_across_devices: False
prompts: None
batch_sampler: batch_sampler
multi_dataset_batch_sampler: proportional

Training Logs

Epoch	Step	Training Loss	Validation Loss	MSE-val-en-to-ar_negative_mse	MSE-val-en-to-da_negative_mse	MSE-val-en-to-de_negative_mse	MSE-val-en-to-en_negative_mse	MSE-val-en-to-es_negative_mse	MSE-val-en-to-fi_negative_mse	MSE-val-en-to-fr_negative_mse	MSE-val-en-to-he_negative_mse	MSE-val-en-to-hu_negative_mse	MSE-val-en-to-it_negative_mse	MSE-val-en-to-ja_negative_mse	MSE-val-en-to-ko_negative_mse	MSE-val-en-to-km_negative_mse	MSE-val-en-to-ms_negative_mse	MSE-val-en-to-nl_negative_mse	MSE-val-en-to-no_negative_mse	MSE-val-en-to-pl_negative_mse	MSE-val-en-to-pt_negative_mse	MSE-val-en-to-ru_negative_mse	MSE-val-en-to-sv_negative_mse	MSE-val-en-to-th_negative_mse	MSE-val-en-to-tr_negative_mse	MSE-val-en-to-vi_negative_mse	MSE-val-en-to-zh_cn_negative_mse	MSE-val-en-to-zh_hk_negative_mse	MSE-val-en-to-zh_tw_negative_mse
0.1367	500	0.3783	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-
0.2734	1000	0.3256	0.3071	-30.0050	-29.7152	-29.7584	-29.5204	-29.6875	-29.9032	-29.6918	-29.9795	-29.9430	-29.7142	-29.8220	-30.0745	-32.1218	-29.8042	-29.7132	-29.7625	-29.7677	-29.6658	-29.8250	-29.8242	-30.1233	-29.8640	-29.7497	-29.6833	-29.7296	-29.7063
0.4102	1500	0.3007	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-
0.5469	2000	0.2795	0.2663	-25.0193	-23.8364	-23.9924	-22.8145	-23.7158	-24.4490	-23.7719	-24.6885	-24.5973	-23.7662	-24.4998	-25.3625	-30.9153	-24.0474	-23.5674	-23.7934	-24.1332	-23.6279	-24.1308	-23.8860	-25.4166	-24.4840	-24.1931	-24.0816	-24.0634	-24.2529
0.6836	2500	0.2659	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-
0.8203	3000	0.2562	0.2487	-22.9862	-21.2544	-21.4573	-19.8714	-21.1251	-22.1884	-21.1984	-22.6963	-22.3069	-21.1959	-22.3180	-23.4410	-30.2373	-21.4324	-20.8799	-21.1834	-21.7427	-21.1291	-21.7291	-21.3003	-23.2994	-22.1537	-21.7480	-21.7521	-21.6844	-21.9702
0.9571	3500	0.2475	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-
1.0938	4000	0.2411	0.2375	-21.8220	-19.6064	-19.9128	-17.9872	-19.5372	-20.7666	-19.6563	-21.4985	-20.9295	-19.6182	-20.9963	-22.2441	-29.7291	-19.8001	-19.2003	-19.5189	-20.2697	-19.5946	-20.3160	-19.6652	-21.9553	-20.6678	-20.2305	-20.3719	-20.2700	-20.6528
1.2305	4500	0.2351	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-
1.3672	5000	0.23	0.2296	-21.0058	-18.4861	-18.7926	-16.6395	-18.4034	-19.7517	-18.5299	-20.6663	-19.9769	-18.4977	-20.0496	-21.4171	-29.3272	-18.6213	-17.9746	-18.3449	-19.2392	-18.4960	-19.3377	-18.5079	-20.9805	-19.5803	-19.1385	-19.4256	-19.2708	-19.7140
1.5040	5500	0.2257	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-
1.6407	6000	0.2222	0.2245	-20.4317	-17.7592	-18.1037	-15.7487	-17.6947	-19.0287	-17.8518	-20.1401	-19.3864	-17.7539	-19.4615	-20.8562	-29.1081	-17.8707	-17.1892	-17.6230	-18.5879	-17.7857	-18.7075	-17.7347	-20.2941	-18.8814	-18.4449	-18.8036	-18.6146	-19.1169
1.7774	6500	0.2186	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-
1.9141	7000	0.2158	0.2199	-19.9961	-17.0956	-17.4488	-14.9930	-17.0238	-18.4442	-17.1720	-19.6005	-18.7765	-17.1020	-18.8972	-20.3720	-28.8656	-17.1949	-16.4824	-16.9655	-17.9687	-17.1229	-18.0911	-17.0128	-19.6600	-18.2823	-17.8109	-18.2341	-18.0582	-18.5735
2.0509	7500	0.2135	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-
2.1876	8000	0.2109	0.2167	-19.6376	-16.6362	-17.0307	-14.4461	-16.5766	-18.0419	-16.7080	-19.2403	-18.3971	-16.6443	-18.5251	-20.0263	-28.7414	-16.7279	-15.9992	-16.5092	-17.5170	-16.6766	-17.7151	-16.5403	-19.2861	-17.8316	-17.3764	-17.8453	-17.6606	-18.1844
2.3243	8500	0.2088	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-
2.4610	9000	0.2074	0.2149	-19.4358	-16.3728	-16.7740	-14.1447	-16.3289	-17.8191	-16.4582	-19.0369	-18.1738	-16.3903	-18.3565	-19.8207	-28.6133	-16.4804	-15.7354	-16.2673	-17.3034	-16.4190	-17.4826	-16.2566	-18.9971	-17.5950	-17.1273	-17.6066	-17.4124	-17.9799
2.5978	9500	0.2059	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-
2.7345	10000	0.2047	0.2134	-19.2764	-16.1718	-16.5449	-13.8928	-16.1098	-17.5866	-16.2421	-18.8665	-17.9798	-16.1538	-18.1695	-19.6218	-28.5605	-16.2479	-15.4962	-16.0522	-17.0797	-16.2106	-17.3130	-16.0278	-18.8206	-17.3910	-16.9231	-17.4203	-17.2266	-17.7903
2.8712	10500	0.2033	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-
3.0079	11000	0.2024	0.2120	-19.1026	-15.9149	-16.3497	-13.6750	-15.8828	-17.3842	-16.0397	-18.6612	-17.7796	-15.9436	-17.9779	-19.4370	-28.4678	-16.0245	-15.2818	-15.8265	-16.8594	-15.9988	-17.1163	-15.8106	-18.5870	-17.1548	-16.7074	-17.2082	-17.0233	-17.5910
3.1447	11500	0.201	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-
3.2814	12000	0.2004	0.2112	-19.0406	-15.8196	-16.2516	-13.5420	-15.7688	-17.2734	-15.9280	-18.5894	-17.6966	-15.8265	-17.8933	-19.3785	-28.4539	-15.9129	-15.1631	-15.7175	-16.7540	-15.8974	-17.0251	-15.6875	-18.4807	-17.0615	-16.6087	-17.1051	-16.9423	-17.4923
3.4181	12500	0.1997	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-
3.5548	13000	0.1995	0.2108	-18.9779	-15.7524	-16.1996	-13.4723	-15.7211	-17.2272	-15.8790	-18.5412	-17.6416	-15.7862	-17.8502	-19.3124	-28.4179	-15.8513	-15.1030	-15.6645	-16.7053	-15.8355	-16.9742	-15.6246	-18.4384	-17.0053	-16.5478	-17.0674	-16.8851	-17.4527
3.6916	13500	0.1991	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-
3.8283	14000	0.1987	0.2103	-18.9326	-15.6858	-16.1256	-13.3884	-15.6481	-17.1741	-15.8143	-18.4839	-17.5854	-15.7066	-17.8007	-19.2666	-28.3875	-15.7831	-15.0272	-15.5984	-16.6414	-15.7691	-16.9116	-15.5558	-18.3703	-16.9459	-16.4828	-16.9964	-16.8207	-17.3817
3.9650	14500	0.1989	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-

Framework Versions

Python: 3.10.12
Sentence Transformers: 3.3.1
Transformers: 4.46.3
PyTorch: 2.5.1+cu121
Accelerate: 1.1.1
Datasets: 3.1.0
Tokenizers: 0.20.3

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

MSELoss

@inproceedings{reimers-2020-multilingual-sentence-bert,
    title = "Making Monolingual Sentence Embeddings Multilingual using Knowledge Distillation",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2020",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/2004.09813",
}