metadata
base_model: sdadas/mmlw-roberta-base
language:
- en
library_name: sentence-transformers
license: apache-2.0
metrics:
- cosine_accuracy@1
- cosine_accuracy@3
- cosine_accuracy@5
- cosine_accuracy@10
- cosine_precision@1
- cosine_precision@3
- cosine_precision@5
- cosine_precision@10
- cosine_recall@1
- cosine_recall@3
- cosine_recall@5
- cosine_recall@10
- cosine_ndcg@10
- cosine_mrr@10
- cosine_map@100
pipeline_tag: sentence-similarity
tags:
- sentence-transformers
- sentence-similarity
- feature-extraction
- dataset_size:1K<n<10K
- loss:MatryoshkaLoss
- loss:MultipleNegativesRankingLoss
widget:
- source_sentence: Żywot św. Stanisława
sentences:
- czym różni się Żywot św. Stanisława od Legendy św. Stanisława?
- w którym kraju w noc sylwestrową je się oliebollen?
- Pierwsze bloki mieszkalne powstały pod koniec lat 80.
- source_sentence: Herkules na rozstajach
sentences:
- jak zinterpretować wymowę obrazu Herkules na rozstajach?
- gdzie zginął przedwojenny minister Antoni Olszewski?
- kiedy konsekrowano katedrę św. Teresy z Avili w Požedze?
- source_sentence: gdzie rośnie bokkonia?
sentences:
- gdzie występuje rogownica szerokolistna?
- Ochrzcił w sierpniu 1982 ich syna księcia Wilhelma.
- Pośmiertnie został odznaczony Krzyżem Virtuti Militari.
- source_sentence: czym jest Kompas Sztuki?
sentences:
- ' Projekt Kompas Sztuki: Galeria m2 (m kwadrat).'
- 'Do rodzaju Caraipa zaliczanych jest ok. 55 gatunków:'
- kto jest aktualnym rekordzistą Chorwacji w skoku w dal?
- source_sentence: Dalsze losy relikwii
sentences:
- Losy relikwii świętego
- czemu gra The Saboteur wywołała wiele kontrowersji?
- >-
kto jest pierwszym rosyjskim kierowcą wyścigowym startującym w Formule
1?
model-index:
- name: mmlw-roberta-base-klej-dyk-v0.1
results:
- task:
type: information-retrieval
name: Information Retrieval
dataset:
name: dim 768
type: dim_768
metrics:
- type: cosine_accuracy@1
value: 0.18990384615384615
name: Cosine Accuracy@1
- type: cosine_accuracy@3
value: 0.5865384615384616
name: Cosine Accuracy@3
- type: cosine_accuracy@5
value: 0.7692307692307693
name: Cosine Accuracy@5
- type: cosine_accuracy@10
value: 0.8533653846153846
name: Cosine Accuracy@10
- type: cosine_precision@1
value: 0.18990384615384615
name: Cosine Precision@1
- type: cosine_precision@3
value: 0.1955128205128205
name: Cosine Precision@3
- type: cosine_precision@5
value: 0.15384615384615383
name: Cosine Precision@5
- type: cosine_precision@10
value: 0.08533653846153846
name: Cosine Precision@10
- type: cosine_recall@1
value: 0.18990384615384615
name: Cosine Recall@1
- type: cosine_recall@3
value: 0.5865384615384616
name: Cosine Recall@3
- type: cosine_recall@5
value: 0.7692307692307693
name: Cosine Recall@5
- type: cosine_recall@10
value: 0.8533653846153846
name: Cosine Recall@10
- type: cosine_ndcg@10
value: 0.5204892782178483
name: Cosine Ndcg@10
- type: cosine_mrr@10
value: 0.4127814026251526
name: Cosine Mrr@10
- type: cosine_map@100
value: 0.418150211843158
name: Cosine Map@100
- task:
type: information-retrieval
name: Information Retrieval
dataset:
name: dim 512
type: dim_512
metrics:
- type: cosine_accuracy@1
value: 0.1875
name: Cosine Accuracy@1
- type: cosine_accuracy@3
value: 0.5889423076923077
name: Cosine Accuracy@3
- type: cosine_accuracy@5
value: 0.7596153846153846
name: Cosine Accuracy@5
- type: cosine_accuracy@10
value: 0.8629807692307693
name: Cosine Accuracy@10
- type: cosine_precision@1
value: 0.1875
name: Cosine Precision@1
- type: cosine_precision@3
value: 0.19631410256410253
name: Cosine Precision@3
- type: cosine_precision@5
value: 0.15192307692307688
name: Cosine Precision@5
- type: cosine_precision@10
value: 0.08629807692307694
name: Cosine Precision@10
- type: cosine_recall@1
value: 0.1875
name: Cosine Recall@1
- type: cosine_recall@3
value: 0.5889423076923077
name: Cosine Recall@3
- type: cosine_recall@5
value: 0.7596153846153846
name: Cosine Recall@5
- type: cosine_recall@10
value: 0.8629807692307693
name: Cosine Recall@10
- type: cosine_ndcg@10
value: 0.5204340563935984
name: Cosine Ndcg@10
- type: cosine_mrr@10
value: 0.4100885225885227
name: Cosine Mrr@10
- type: cosine_map@100
value: 0.4147514658961434
name: Cosine Map@100
- task:
type: information-retrieval
name: Information Retrieval
dataset:
name: dim 256
type: dim_256
metrics:
- type: cosine_accuracy@1
value: 0.19471153846153846
name: Cosine Accuracy@1
- type: cosine_accuracy@3
value: 0.5649038461538461
name: Cosine Accuracy@3
- type: cosine_accuracy@5
value: 0.7451923076923077
name: Cosine Accuracy@5
- type: cosine_accuracy@10
value: 0.8461538461538461
name: Cosine Accuracy@10
- type: cosine_precision@1
value: 0.19471153846153846
name: Cosine Precision@1
- type: cosine_precision@3
value: 0.18830128205128205
name: Cosine Precision@3
- type: cosine_precision@5
value: 0.1490384615384615
name: Cosine Precision@5
- type: cosine_precision@10
value: 0.08461538461538462
name: Cosine Precision@10
- type: cosine_recall@1
value: 0.19471153846153846
name: Cosine Recall@1
- type: cosine_recall@3
value: 0.5649038461538461
name: Cosine Recall@3
- type: cosine_recall@5
value: 0.7451923076923077
name: Cosine Recall@5
- type: cosine_recall@10
value: 0.8461538461538461
name: Cosine Recall@10
- type: cosine_ndcg@10
value: 0.5144907264607753
name: Cosine Ndcg@10
- type: cosine_mrr@10
value: 0.4078373015873016
name: Cosine Mrr@10
- type: cosine_map@100
value: 0.413093644747221
name: Cosine Map@100
- task:
type: information-retrieval
name: Information Retrieval
dataset:
name: dim 128
type: dim_128
metrics:
- type: cosine_accuracy@1
value: 0.18269230769230768
name: Cosine Accuracy@1
- type: cosine_accuracy@3
value: 0.5192307692307693
name: Cosine Accuracy@3
- type: cosine_accuracy@5
value: 0.7163461538461539
name: Cosine Accuracy@5
- type: cosine_accuracy@10
value: 0.8293269230769231
name: Cosine Accuracy@10
- type: cosine_precision@1
value: 0.18269230769230768
name: Cosine Precision@1
- type: cosine_precision@3
value: 0.17307692307692307
name: Cosine Precision@3
- type: cosine_precision@5
value: 0.14326923076923076
name: Cosine Precision@5
- type: cosine_precision@10
value: 0.08293269230769229
name: Cosine Precision@10
- type: cosine_recall@1
value: 0.18269230769230768
name: Cosine Recall@1
- type: cosine_recall@3
value: 0.5192307692307693
name: Cosine Recall@3
- type: cosine_recall@5
value: 0.7163461538461539
name: Cosine Recall@5
- type: cosine_recall@10
value: 0.8293269230769231
name: Cosine Recall@10
- type: cosine_ndcg@10
value: 0.4955346842225082
name: Cosine Ndcg@10
- type: cosine_mrr@10
value: 0.38889652014651993
name: Cosine Mrr@10
- type: cosine_map@100
value: 0.39396452853345754
name: Cosine Map@100
- task:
type: information-retrieval
name: Information Retrieval
dataset:
name: dim 64
type: dim_64
metrics:
- type: cosine_accuracy@1
value: 0.1778846153846154
name: Cosine Accuracy@1
- type: cosine_accuracy@3
value: 0.4831730769230769
name: Cosine Accuracy@3
- type: cosine_accuracy@5
value: 0.6514423076923077
name: Cosine Accuracy@5
- type: cosine_accuracy@10
value: 0.7740384615384616
name: Cosine Accuracy@10
- type: cosine_precision@1
value: 0.1778846153846154
name: Cosine Precision@1
- type: cosine_precision@3
value: 0.16105769230769232
name: Cosine Precision@3
- type: cosine_precision@5
value: 0.13028846153846152
name: Cosine Precision@5
- type: cosine_precision@10
value: 0.07740384615384614
name: Cosine Precision@10
- type: cosine_recall@1
value: 0.1778846153846154
name: Cosine Recall@1
- type: cosine_recall@3
value: 0.4831730769230769
name: Cosine Recall@3
- type: cosine_recall@5
value: 0.6514423076923077
name: Cosine Recall@5
- type: cosine_recall@10
value: 0.7740384615384616
name: Cosine Recall@10
- type: cosine_ndcg@10
value: 0.4639263641936578
name: Cosine Ndcg@10
- type: cosine_mrr@10
value: 0.36540083180708166
name: Cosine Mrr@10
- type: cosine_map@100
value: 0.3728380879103276
name: Cosine Map@100
mmlw-roberta-base-klej-dyk-v0.1
This is a sentence-transformers model finetuned from sdadas/mmlw-roberta-base. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
Model Details
Model Description
- Model Type: Sentence Transformer
- Base model: sdadas/mmlw-roberta-base
- Maximum Sequence Length: 512 tokens
- Output Dimensionality: 768 tokens
- Similarity Function: Cosine Similarity
- Language: en
- License: apache-2.0
Model Sources
Full Model Architecture
SentenceTransformer(
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: RobertaModel
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)
Usage
Direct Usage (Sentence Transformers)
First install the Sentence Transformers library:
pip install -U sentence-transformers
Then you can load this model and run inference.
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("sentence_transformers_model_id")
sentences = [
'Dalsze losy relikwii',
'Losy relikwii świętego',
'czemu gra The Saboteur wywołała wiele kontrowersji?',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
Evaluation
Metrics
Information Retrieval
Metric |
Value |
cosine_accuracy@1 |
0.1899 |
cosine_accuracy@3 |
0.5865 |
cosine_accuracy@5 |
0.7692 |
cosine_accuracy@10 |
0.8534 |
cosine_precision@1 |
0.1899 |
cosine_precision@3 |
0.1955 |
cosine_precision@5 |
0.1538 |
cosine_precision@10 |
0.0853 |
cosine_recall@1 |
0.1899 |
cosine_recall@3 |
0.5865 |
cosine_recall@5 |
0.7692 |
cosine_recall@10 |
0.8534 |
cosine_ndcg@10 |
0.5205 |
cosine_mrr@10 |
0.4128 |
cosine_map@100 |
0.4182 |
Information Retrieval
Metric |
Value |
cosine_accuracy@1 |
0.1875 |
cosine_accuracy@3 |
0.5889 |
cosine_accuracy@5 |
0.7596 |
cosine_accuracy@10 |
0.863 |
cosine_precision@1 |
0.1875 |
cosine_precision@3 |
0.1963 |
cosine_precision@5 |
0.1519 |
cosine_precision@10 |
0.0863 |
cosine_recall@1 |
0.1875 |
cosine_recall@3 |
0.5889 |
cosine_recall@5 |
0.7596 |
cosine_recall@10 |
0.863 |
cosine_ndcg@10 |
0.5204 |
cosine_mrr@10 |
0.4101 |
cosine_map@100 |
0.4148 |
Information Retrieval
Metric |
Value |
cosine_accuracy@1 |
0.1947 |
cosine_accuracy@3 |
0.5649 |
cosine_accuracy@5 |
0.7452 |
cosine_accuracy@10 |
0.8462 |
cosine_precision@1 |
0.1947 |
cosine_precision@3 |
0.1883 |
cosine_precision@5 |
0.149 |
cosine_precision@10 |
0.0846 |
cosine_recall@1 |
0.1947 |
cosine_recall@3 |
0.5649 |
cosine_recall@5 |
0.7452 |
cosine_recall@10 |
0.8462 |
cosine_ndcg@10 |
0.5145 |
cosine_mrr@10 |
0.4078 |
cosine_map@100 |
0.4131 |
Information Retrieval
Metric |
Value |
cosine_accuracy@1 |
0.1827 |
cosine_accuracy@3 |
0.5192 |
cosine_accuracy@5 |
0.7163 |
cosine_accuracy@10 |
0.8293 |
cosine_precision@1 |
0.1827 |
cosine_precision@3 |
0.1731 |
cosine_precision@5 |
0.1433 |
cosine_precision@10 |
0.0829 |
cosine_recall@1 |
0.1827 |
cosine_recall@3 |
0.5192 |
cosine_recall@5 |
0.7163 |
cosine_recall@10 |
0.8293 |
cosine_ndcg@10 |
0.4955 |
cosine_mrr@10 |
0.3889 |
cosine_map@100 |
0.394 |
Information Retrieval
Metric |
Value |
cosine_accuracy@1 |
0.1779 |
cosine_accuracy@3 |
0.4832 |
cosine_accuracy@5 |
0.6514 |
cosine_accuracy@10 |
0.774 |
cosine_precision@1 |
0.1779 |
cosine_precision@3 |
0.1611 |
cosine_precision@5 |
0.1303 |
cosine_precision@10 |
0.0774 |
cosine_recall@1 |
0.1779 |
cosine_recall@3 |
0.4832 |
cosine_recall@5 |
0.6514 |
cosine_recall@10 |
0.774 |
cosine_ndcg@10 |
0.4639 |
cosine_mrr@10 |
0.3654 |
cosine_map@100 |
0.3728 |
Training Details
Training Dataset
Unnamed Dataset
- Size: 3,738 training samples
- Columns:
positive
and anchor
- Approximate statistics based on the first 1000 samples:
|
positive |
anchor |
type |
string |
string |
details |
- min: 5 tokens
- mean: 50.1 tokens
- max: 466 tokens
|
- min: 6 tokens
- mean: 16.62 tokens
- max: 49 tokens
|
- Samples:
positive |
anchor |
Zespół Blaua (zespół Jabsa, ang. Blau syndrome, BS) – rzadka choroba genetyczna o dziedziczeniu autosomalnym dominującym, charakteryzująca się ziarniniakowym zapaleniem stawów o wczesnym początku, zapaleniem jagodówki (uveitis) i wysypką skórną, a także kamptodaktylią. |
jakie choroby genetyczne dziedziczą się autosomalnie dominująco? |
Gorgippia Gorgippia – starożytne miasto bosporańskie nad Morzem Czarnym, którego pozostałości znajdują się obecnie pod współczesną zabudową centralnej części miasta Anapa w Kraju Krasnodarskim w Rosji. |
gdzie obecnie znajduje się starożytne miasto Gorgippia? |
Ulubionym dystansem Rücker było 400 metrów i to na nim notowała największe indywidualne sukcesy : srebrny medal Mistrzostw Europy juniorów w lekkoatletyce (Saloniki 1991) 6. miejsce w Pucharze Świata w Lekkoatletyce (Hawana 1992) 5. miejsce na Mistrzostwach Europy w Lekkoatletyce (Helsinki 1994) srebro podczas Mistrzostw Świata w Lekkoatletyce (Sewilla 1999) złota medalistka mistrzostw Niemiec Duże sukcesy odnosiła także w sztafecie 4 x 400 metrów : złoto Mistrzostw Europy juniorów w lekkoatletyce (Varaždin 1989) złoty medal Mistrzostw Europy juniorów w lekkoatletyce (Saloniki 1991) brąz na Mistrzostwach Europy w Lekkoatletyce (Helsinki 1994) brązowy medal podczas Igrzysk Olimpijskich (Atlanta 1996) brąz na Halowych Mistrzostwach Świata w Lekkoatletyce (Paryż 1997) złoto Mistrzostw Świata w Lekkoatletyce (Ateny 1997) brązowy medal Mistrzostw Świata w Lekkoatletyce (Sewilla 1999) |
kto zaprojektował medale, które będą wręczane podczas tegorocznych mistrzostw Europy juniorów w lekkoatletyce? |
- Loss:
MatryoshkaLoss
with these parameters:{
"loss": "MultipleNegativesRankingLoss",
"matryoshka_dims": [
768,
512,
256,
128,
64
],
"matryoshka_weights": [
1,
1,
1,
1,
1
],
"n_dims_per_step": -1
}
Training Hyperparameters
Non-Default Hyperparameters
eval_strategy
: epoch
gradient_accumulation_steps
: 8
learning_rate
: 2e-05
num_train_epochs
: 5
lr_scheduler_type
: cosine
warmup_ratio
: 0.1
bf16
: True
tf32
: True
load_best_model_at_end
: True
optim
: adamw_torch_fused
batch_sampler
: no_duplicates
All Hyperparameters
Click to expand
overwrite_output_dir
: False
do_predict
: False
eval_strategy
: epoch
prediction_loss_only
: True
per_device_train_batch_size
: 8
per_device_eval_batch_size
: 8
per_gpu_train_batch_size
: None
per_gpu_eval_batch_size
: None
gradient_accumulation_steps
: 8
eval_accumulation_steps
: None
learning_rate
: 2e-05
weight_decay
: 0.0
adam_beta1
: 0.9
adam_beta2
: 0.999
adam_epsilon
: 1e-08
max_grad_norm
: 1.0
num_train_epochs
: 5
max_steps
: -1
lr_scheduler_type
: cosine
lr_scheduler_kwargs
: {}
warmup_ratio
: 0.1
warmup_steps
: 0
log_level
: passive
log_level_replica
: warning
log_on_each_node
: True
logging_nan_inf_filter
: True
save_safetensors
: True
save_on_each_node
: False
save_only_model
: False
restore_callback_states_from_checkpoint
: False
no_cuda
: False
use_cpu
: False
use_mps_device
: False
seed
: 42
data_seed
: None
jit_mode_eval
: False
use_ipex
: False
bf16
: True
fp16
: False
fp16_opt_level
: O1
half_precision_backend
: auto
bf16_full_eval
: False
fp16_full_eval
: False
tf32
: True
local_rank
: 0
ddp_backend
: None
tpu_num_cores
: None
tpu_metrics_debug
: False
debug
: []
dataloader_drop_last
: False
dataloader_num_workers
: 0
dataloader_prefetch_factor
: None
past_index
: -1
disable_tqdm
: False
remove_unused_columns
: True
label_names
: None
load_best_model_at_end
: True
ignore_data_skip
: False
fsdp
: []
fsdp_min_num_params
: 0
fsdp_config
: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
fsdp_transformer_layer_cls_to_wrap
: None
accelerator_config
: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
deepspeed
: None
label_smoothing_factor
: 0.0
optim
: adamw_torch_fused
optim_args
: None
adafactor
: False
group_by_length
: False
length_column_name
: length
ddp_find_unused_parameters
: None
ddp_bucket_cap_mb
: None
ddp_broadcast_buffers
: False
dataloader_pin_memory
: True
dataloader_persistent_workers
: False
skip_memory_metrics
: True
use_legacy_prediction_loop
: False
push_to_hub
: False
resume_from_checkpoint
: None
hub_model_id
: None
hub_strategy
: every_save
hub_private_repo
: False
hub_always_push
: False
gradient_checkpointing
: False
gradient_checkpointing_kwargs
: None
include_inputs_for_metrics
: False
eval_do_concat_batches
: True
fp16_backend
: auto
push_to_hub_model_id
: None
push_to_hub_organization
: None
mp_parameters
:
auto_find_batch_size
: False
full_determinism
: False
torchdynamo
: None
ray_scope
: last
ddp_timeout
: 1800
torch_compile
: False
torch_compile_backend
: None
torch_compile_mode
: None
dispatch_batches
: None
split_batches
: None
include_tokens_per_second
: False
include_num_input_tokens_seen
: False
neftune_noise_alpha
: None
optim_target_modules
: None
batch_eval_metrics
: False
batch_sampler
: no_duplicates
multi_dataset_batch_sampler
: proportional
Training Logs
Click to expand
Epoch |
Step |
Training Loss |
dim_128_cosine_map@100 |
dim_256_cosine_map@100 |
dim_512_cosine_map@100 |
dim_64_cosine_map@100 |
dim_768_cosine_map@100 |
0 |
0 |
- |
0.3475 |
0.3675 |
0.3753 |
0.2982 |
0.3798 |
0.0171 |
1 |
2.6683 |
- |
- |
- |
- |
- |
0.0342 |
2 |
3.2596 |
- |
- |
- |
- |
- |
0.0513 |
3 |
3.4541 |
- |
- |
- |
- |
- |
0.0684 |
4 |
2.4201 |
- |
- |
- |
- |
- |
0.0855 |
5 |
3.5911 |
- |
- |
- |
- |
- |
0.1026 |
6 |
3.0902 |
- |
- |
- |
- |
- |
0.1197 |
7 |
2.5999 |
- |
- |
- |
- |
- |
0.1368 |
8 |
2.892 |
- |
- |
- |
- |
- |
0.1538 |
9 |
2.8722 |
- |
- |
- |
- |
- |
0.1709 |
10 |
2.3703 |
- |
- |
- |
- |
- |
0.1880 |
11 |
2.6833 |
- |
- |
- |
- |
- |
0.2051 |
12 |
1.9814 |
- |
- |
- |
- |
- |
0.2222 |
13 |
1.6643 |
- |
- |
- |
- |
- |
0.2393 |
14 |
1.8493 |
- |
- |
- |
- |
- |
0.2564 |
15 |
1.5136 |
- |
- |
- |
- |
- |
0.2735 |
16 |
1.9726 |
- |
- |
- |
- |
- |
0.2906 |
17 |
1.1505 |
- |
- |
- |
- |
- |
0.3077 |
18 |
1.3834 |
- |
- |
- |
- |
- |
0.3248 |
19 |
1.2244 |
- |
- |
- |
- |
- |
0.3419 |
20 |
1.2107 |
- |
- |
- |
- |
- |
0.3590 |
21 |
0.8936 |
- |
- |
- |
- |
- |
0.3761 |
22 |
0.8144 |
- |
- |
- |
- |
- |
0.3932 |
23 |
0.8353 |
- |
- |
- |
- |
- |
0.4103 |
24 |
1.572 |
- |
- |
- |
- |
- |
0.4274 |
25 |
0.9257 |
- |
- |
- |
- |
- |
0.4444 |
26 |
0.8405 |
- |
- |
- |
- |
- |
0.4615 |
27 |
0.5621 |
- |
- |
- |
- |
- |
0.4786 |
28 |
0.4241 |
- |
- |
- |
- |
- |
0.4957 |
29 |
0.6171 |
- |
- |
- |
- |
- |
0.5128 |
30 |
0.5989 |
- |
- |
- |
- |
- |
0.5299 |
31 |
0.2767 |
- |
- |
- |
- |
- |
0.5470 |
32 |
0.5599 |
- |
- |
- |
- |
- |
0.5641 |
33 |
0.5964 |
- |
- |
- |
- |
- |
0.5812 |
34 |
0.9778 |
- |
- |
- |
- |
- |
0.5983 |
35 |
0.772 |
- |
- |
- |
- |
- |
0.6154 |
36 |
1.0341 |
- |
- |
- |
- |
- |
0.6325 |
37 |
0.3503 |
- |
- |
- |
- |
- |
0.6496 |
38 |
0.8229 |
- |
- |
- |
- |
- |
0.6667 |
39 |
0.969 |
- |
- |
- |
- |
- |
0.6838 |
40 |
1.7993 |
- |
- |
- |
- |
- |
0.7009 |
41 |
0.5542 |
- |
- |
- |
- |
- |
0.7179 |
42 |
1.332 |
- |
- |
- |
- |
- |
0.7350 |
43 |
1.1516 |
- |
- |
- |
- |
- |
0.7521 |
44 |
1.3183 |
- |
- |
- |
- |
- |
0.7692 |
45 |
1.0865 |
- |
- |
- |
- |
- |
0.7863 |
46 |
0.6204 |
- |
- |
- |
- |
- |
0.8034 |
47 |
0.7541 |
- |
- |
- |
- |
- |
0.8205 |
48 |
0.9362 |
- |
- |
- |
- |
- |
0.8376 |
49 |
0.3979 |
- |
- |
- |
- |
- |
0.8547 |
50 |
0.7187 |
- |
- |
- |
- |
- |
0.8718 |
51 |
0.9217 |
- |
- |
- |
- |
- |
0.8889 |
52 |
0.4866 |
- |
- |
- |
- |
- |
0.9060 |
53 |
0.355 |
- |
- |
- |
- |
- |
0.9231 |
54 |
0.7172 |
- |
- |
- |
- |
- |
0.9402 |
55 |
0.6007 |
- |
- |
- |
- |
- |
0.9573 |
56 |
1.1547 |
- |
- |
- |
- |
- |
0.9744 |
57 |
0.5713 |
- |
- |
- |
- |
- |
0.9915 |
58 |
0.9089 |
0.3985 |
0.4164 |
0.4264 |
0.3642 |
0.4255 |
1.0085 |
59 |
0.594 |
- |
- |
- |
- |
- |
1.0256 |
60 |
0.6554 |
- |
- |
- |
- |
- |
1.0427 |
61 |
0.2794 |
- |
- |
- |
- |
- |
1.0598 |
62 |
0.8654 |
- |
- |
- |
- |
- |
1.0769 |
63 |
0.9698 |
- |
- |
- |
- |
- |
1.0940 |
64 |
1.4827 |
- |
- |
- |
- |
- |
1.1111 |
65 |
0.3159 |
- |
- |
- |
- |
- |
1.1282 |
66 |
0.255 |
- |
- |
- |
- |
- |
1.1453 |
67 |
0.9819 |
- |
- |
- |
- |
- |
1.1624 |
68 |
0.7442 |
- |
- |
- |
- |
- |
1.1795 |
69 |
0.8199 |
- |
- |
- |
- |
- |
1.1966 |
70 |
0.2647 |
- |
- |
- |
- |
- |
1.2137 |
71 |
0.4098 |
- |
- |
- |
- |
- |
1.2308 |
72 |
0.1608 |
- |
- |
- |
- |
- |
1.2479 |
73 |
0.2092 |
- |
- |
- |
- |
- |
1.2650 |
74 |
0.1231 |
- |
- |
- |
- |
- |
1.2821 |
75 |
0.3203 |
- |
- |
- |
- |
- |
1.2991 |
76 |
0.1435 |
- |
- |
- |
- |
- |
1.3162 |
77 |
0.2293 |
- |
- |
- |
- |
- |
1.3333 |
78 |
0.131 |
- |
- |
- |
- |
- |
1.3504 |
79 |
0.1662 |
- |
- |
- |
- |
- |
1.3675 |
80 |
0.094 |
- |
- |
- |
- |
- |
1.3846 |
81 |
0.1454 |
- |
- |
- |
- |
- |
1.4017 |
82 |
0.3096 |
- |
- |
- |
- |
- |
1.4188 |
83 |
0.3188 |
- |
- |
- |
- |
- |
1.4359 |
84 |
0.1156 |
- |
- |
- |
- |
- |
1.4530 |
85 |
0.0581 |
- |
- |
- |
- |
- |
1.4701 |
86 |
0.0543 |
- |
- |
- |
- |
- |
1.4872 |
87 |
0.0427 |
- |
- |
- |
- |
- |
1.5043 |
88 |
0.07 |
- |
- |
- |
- |
- |
1.5214 |
89 |
0.0451 |
- |
- |
- |
- |
- |
1.5385 |
90 |
0.0646 |
- |
- |
- |
- |
- |
1.5556 |
91 |
0.1152 |
- |
- |
- |
- |
- |
1.5726 |
92 |
0.1292 |
- |
- |
- |
- |
- |
1.5897 |
93 |
0.1591 |
- |
- |
- |
- |
- |
1.6068 |
94 |
0.1194 |
- |
- |
- |
- |
- |
1.6239 |
95 |
0.0876 |
- |
- |
- |
- |
- |
1.6410 |
96 |
0.1018 |
- |
- |
- |
- |
- |
1.6581 |
97 |
0.3309 |
- |
- |
- |
- |
- |
1.6752 |
98 |
0.2214 |
- |
- |
- |
- |
- |
1.6923 |
99 |
0.1536 |
- |
- |
- |
- |
- |
1.7094 |
100 |
0.1543 |
- |
- |
- |
- |
- |
1.7265 |
101 |
0.3663 |
- |
- |
- |
- |
- |
1.7436 |
102 |
0.2719 |
- |
- |
- |
- |
- |
1.7607 |
103 |
0.1379 |
- |
- |
- |
- |
- |
1.7778 |
104 |
0.0479 |
- |
- |
- |
- |
- |
1.7949 |
105 |
0.0757 |
- |
- |
- |
- |
- |
1.8120 |
106 |
0.059 |
- |
- |
- |
- |
- |
1.8291 |
107 |
0.119 |
- |
- |
- |
- |
- |
1.8462 |
108 |
0.1295 |
- |
- |
- |
- |
- |
1.8632 |
109 |
0.115 |
- |
- |
- |
- |
- |
1.8803 |
110 |
0.142 |
- |
- |
- |
- |
- |
1.8974 |
111 |
0.1064 |
- |
- |
- |
- |
- |
1.9145 |
112 |
0.0959 |
- |
- |
- |
- |
- |
1.9316 |
113 |
0.0839 |
- |
- |
- |
- |
- |
1.9487 |
114 |
0.1762 |
- |
- |
- |
- |
- |
1.9658 |
115 |
0.1986 |
- |
- |
- |
- |
- |
1.9829 |
116 |
0.0599 |
- |
- |
- |
- |
- |
2.0 |
117 |
0.1145 |
0.3869 |
0.4095 |
0.4135 |
0.3664 |
0.4195 |
2.0171 |
118 |
0.0815 |
- |
- |
- |
- |
- |
2.0342 |
119 |
0.1052 |
- |
- |
- |
- |
- |
2.0513 |
120 |
0.1348 |
- |
- |
- |
- |
- |
2.0684 |
121 |
0.255 |
- |
- |
- |
- |
- |
2.0855 |
122 |
0.251 |
- |
- |
- |
- |
- |
2.1026 |
123 |
0.3033 |
- |
- |
- |
- |
- |
2.1197 |
124 |
0.0385 |
- |
- |
- |
- |
- |
2.1368 |
125 |
0.0687 |
- |
- |
- |
- |
- |
2.1538 |
126 |
0.1682 |
- |
- |
- |
- |
- |
2.1709 |
127 |
0.0774 |
- |
- |
- |
- |
- |
2.1880 |
128 |
0.0944 |
- |
- |
- |
- |
- |
2.2051 |
129 |
0.036 |
- |
- |
- |
- |
- |
2.2222 |
130 |
0.0393 |
- |
- |
- |
- |
- |
2.2393 |
131 |
0.0387 |
- |
- |
- |
- |
- |
2.2564 |
132 |
0.0273 |
- |
- |
- |
- |
- |
2.2735 |
133 |
0.056 |
- |
- |
- |
- |
- |
2.2906 |
134 |
0.0279 |
- |
- |
- |
- |
- |
2.3077 |
135 |
0.0557 |
- |
- |
- |
- |
- |
2.3248 |
136 |
0.0197 |
- |
- |
- |
- |
- |
2.3419 |
137 |
0.0216 |
- |
- |
- |
- |
- |
2.3590 |
138 |
0.0212 |
- |
- |
- |
- |
- |
2.3761 |
139 |
0.0239 |
- |
- |
- |
- |
- |
2.3932 |
140 |
0.0526 |
- |
- |
- |
- |
- |
2.4103 |
141 |
0.1072 |
- |
- |
- |
- |
- |
2.4274 |
142 |
0.0347 |
- |
- |
- |
- |
- |
2.4444 |
143 |
0.024 |
- |
- |
- |
- |
- |
2.4615 |
144 |
0.0128 |
- |
- |
- |
- |
- |
2.4786 |
145 |
0.0089 |
- |
- |
- |
- |
- |
2.4957 |
146 |
0.0101 |
- |
- |
- |
- |
- |
2.5128 |
147 |
0.0124 |
- |
- |
- |
- |
- |
2.5299 |
148 |
0.011 |
- |
- |
- |
- |
- |
2.5470 |
149 |
0.0182 |
- |
- |
- |
- |
- |
2.5641 |
150 |
0.0379 |
- |
- |
- |
- |
- |
2.5812 |
151 |
0.0395 |
- |
- |
- |
- |
- |
2.5983 |
152 |
0.0372 |
- |
- |
- |
- |
- |
2.6154 |
153 |
0.031 |
- |
- |
- |
- |
- |
2.6325 |
154 |
0.0136 |
- |
- |
- |
- |
- |
2.6496 |
155 |
0.0355 |
- |
- |
- |
- |
- |
2.6667 |
156 |
0.0296 |
- |
- |
- |
- |
- |
2.6838 |
157 |
0.0473 |
- |
- |
- |
- |
- |
2.7009 |
158 |
0.0295 |
- |
- |
- |
- |
- |
2.7179 |
159 |
0.0576 |
- |
- |
- |
- |
- |
2.7350 |
160 |
0.0592 |
- |
- |
- |
- |
- |
2.7521 |
161 |
0.0571 |
- |
- |
- |
- |
- |
2.7692 |
162 |
0.0221 |
- |
- |
- |
- |
- |
2.7863 |
163 |
0.0179 |
- |
- |
- |
- |
- |
2.8034 |
164 |
0.0195 |
- |
- |
- |
- |
- |
2.8205 |
165 |
0.0291 |
- |
- |
- |
- |
- |
2.8376 |
166 |
0.024 |
- |
- |
- |
- |
- |
2.8547 |
167 |
0.0396 |
- |
- |
- |
- |
- |
2.8718 |
168 |
0.0352 |
- |
- |
- |
- |
- |
2.8889 |
169 |
0.0431 |
- |
- |
- |
- |
- |
2.9060 |
170 |
0.0222 |
- |
- |
- |
- |
- |
2.9231 |
171 |
0.016 |
- |
- |
- |
- |
- |
2.9402 |
172 |
0.0307 |
- |
- |
- |
- |
- |
2.9573 |
173 |
0.0439 |
- |
- |
- |
- |
- |
2.9744 |
174 |
0.0197 |
- |
- |
- |
- |
- |
2.9915 |
175 |
0.0181 |
0.3928 |
0.4120 |
0.4152 |
0.3717 |
0.4180 |
3.0085 |
176 |
0.03 |
- |
- |
- |
- |
- |
3.0256 |
177 |
0.0325 |
- |
- |
- |
- |
- |
3.0427 |
178 |
0.0286 |
- |
- |
- |
- |
- |
3.0598 |
179 |
0.0746 |
- |
- |
- |
- |
- |
3.0769 |
180 |
0.0677 |
- |
- |
- |
- |
- |
3.0940 |
181 |
0.0574 |
- |
- |
- |
- |
- |
3.1111 |
182 |
0.0158 |
- |
- |
- |
- |
- |
3.1282 |
183 |
0.0092 |
- |
- |
- |
- |
- |
3.1453 |
184 |
0.0412 |
- |
- |
- |
- |
- |
3.1624 |
185 |
0.0308 |
- |
- |
- |
- |
- |
3.1795 |
186 |
0.022 |
- |
- |
- |
- |
- |
3.1966 |
187 |
0.0157 |
- |
- |
- |
- |
- |
3.2137 |
188 |
0.0109 |
- |
- |
- |
- |
- |
3.2308 |
189 |
0.0059 |
- |
- |
- |
- |
- |
3.2479 |
190 |
0.0206 |
- |
- |
- |
- |
- |
3.2650 |
191 |
0.0135 |
- |
- |
- |
- |
- |
3.2821 |
192 |
0.0199 |
- |
- |
- |
- |
- |
3.2991 |
193 |
0.0124 |
- |
- |
- |
- |
- |
3.3162 |
194 |
0.0081 |
- |
- |
- |
- |
- |
3.3333 |
195 |
0.0052 |
- |
- |
- |
- |
- |
3.3504 |
196 |
0.006 |
- |
- |
- |
- |
- |
3.3675 |
197 |
0.0074 |
- |
- |
- |
- |
- |
3.3846 |
198 |
0.0085 |
- |
- |
- |
- |
- |
3.4017 |
199 |
0.0273 |
- |
- |
- |
- |
- |
3.4188 |
200 |
0.0363 |
- |
- |
- |
- |
- |
3.4359 |
201 |
0.0077 |
- |
- |
- |
- |
- |
3.4530 |
202 |
0.0046 |
- |
- |
- |
- |
- |
3.4701 |
203 |
0.0067 |
- |
- |
- |
- |
- |
3.4872 |
204 |
0.0054 |
- |
- |
- |
- |
- |
3.5043 |
205 |
0.0055 |
- |
- |
- |
- |
- |
3.5214 |
206 |
0.0052 |
- |
- |
- |
- |
- |
3.5385 |
207 |
0.004 |
- |
- |
- |
- |
- |
3.5556 |
208 |
0.0102 |
- |
- |
- |
- |
- |
3.5726 |
209 |
0.0228 |
- |
- |
- |
- |
- |
3.5897 |
210 |
0.0315 |
- |
- |
- |
- |
- |
3.6068 |
211 |
0.0095 |
- |
- |
- |
- |
- |
3.6239 |
212 |
0.0069 |
- |
- |
- |
- |
- |
3.6410 |
213 |
0.0066 |
- |
- |
- |
- |
- |
3.6581 |
214 |
0.0395 |
- |
- |
- |
- |
- |
3.6752 |
215 |
0.0176 |
- |
- |
- |
- |
- |
3.6923 |
216 |
0.0156 |
- |
- |
- |
- |
- |
3.7094 |
217 |
0.0168 |
- |
- |
- |
- |
- |
3.7265 |
218 |
0.0376 |
- |
- |
- |
- |
- |
3.7436 |
219 |
0.0149 |
- |
- |
- |
- |
- |
3.7607 |
220 |
0.0179 |
- |
- |
- |
- |
- |
3.7778 |
221 |
0.0059 |
- |
- |
- |
- |
- |
3.7949 |
222 |
0.013 |
- |
- |
- |
- |
- |
3.8120 |
223 |
0.0081 |
- |
- |
- |
- |
- |
3.8291 |
224 |
0.0136 |
- |
- |
- |
- |
- |
3.8462 |
225 |
0.0129 |
- |
- |
- |
- |
- |
3.8632 |
226 |
0.0132 |
- |
- |
- |
- |
- |
3.8803 |
227 |
0.0228 |
- |
- |
- |
- |
- |
3.8974 |
228 |
0.0091 |
- |
- |
- |
- |
- |
3.9145 |
229 |
0.0112 |
- |
- |
- |
- |
- |
3.9316 |
230 |
0.0124 |
- |
- |
- |
- |
- |
3.9487 |
231 |
0.0224 |
- |
- |
- |
- |
- |
3.9658 |
232 |
0.0191 |
- |
- |
- |
- |
- |
3.9829 |
233 |
0.0078 |
- |
- |
- |
- |
- |
4.0 |
234 |
0.0145 |
0.3959 |
0.411 |
0.4154 |
0.3741 |
0.4179 |
4.0171 |
235 |
0.0089 |
- |
- |
- |
- |
- |
4.0342 |
236 |
0.0157 |
- |
- |
- |
- |
- |
4.0513 |
237 |
0.019 |
- |
- |
- |
- |
- |
4.0684 |
238 |
0.0315 |
- |
- |
- |
- |
- |
4.0855 |
239 |
0.0311 |
- |
- |
- |
- |
- |
4.1026 |
240 |
0.0155 |
- |
- |
- |
- |
- |
4.1197 |
241 |
0.0078 |
- |
- |
- |
- |
- |
4.1368 |
242 |
0.0069 |
- |
- |
- |
- |
- |
4.1538 |
243 |
0.0246 |
- |
- |
- |
- |
- |
4.1709 |
244 |
0.011 |
- |
- |
- |
- |
- |
4.1880 |
245 |
0.0169 |
- |
- |
- |
- |
- |
4.2051 |
246 |
0.0065 |
- |
- |
- |
- |
- |
4.2222 |
247 |
0.0093 |
- |
- |
- |
- |
- |
4.2393 |
248 |
0.0059 |
- |
- |
- |
- |
- |
4.2564 |
249 |
0.0072 |
- |
- |
- |
- |
- |
4.2735 |
250 |
0.0114 |
- |
- |
- |
- |
- |
4.2906 |
251 |
0.0048 |
- |
- |
- |
- |
- |
4.3077 |
252 |
0.0099 |
- |
- |
- |
- |
- |
4.3248 |
253 |
0.0061 |
- |
- |
- |
- |
- |
4.3419 |
254 |
0.005 |
- |
- |
- |
- |
- |
4.3590 |
255 |
0.0077 |
- |
- |
- |
- |
- |
4.3761 |
256 |
0.0057 |
- |
- |
- |
- |
- |
4.3932 |
257 |
0.0106 |
- |
- |
- |
- |
- |
4.4103 |
258 |
0.0176 |
- |
- |
- |
- |
- |
4.4274 |
259 |
0.0085 |
- |
- |
- |
- |
- |
4.4444 |
260 |
0.0059 |
- |
- |
- |
- |
- |
4.4615 |
261 |
0.0063 |
- |
- |
- |
- |
- |
4.4786 |
262 |
0.003 |
- |
- |
- |
- |
- |
4.4957 |
263 |
0.0041 |
- |
- |
- |
- |
- |
4.5128 |
264 |
0.0048 |
- |
- |
- |
- |
- |
4.5299 |
265 |
0.0037 |
- |
- |
- |
- |
- |
4.5470 |
266 |
0.0052 |
- |
- |
- |
- |
- |
4.5641 |
267 |
0.0084 |
- |
- |
- |
- |
- |
4.5812 |
268 |
0.0183 |
- |
- |
- |
- |
- |
4.5983 |
269 |
0.0065 |
- |
- |
- |
- |
- |
4.6154 |
270 |
0.0074 |
- |
- |
- |
- |
- |
4.6325 |
271 |
0.0046 |
- |
- |
- |
- |
- |
4.6496 |
272 |
0.009 |
- |
- |
- |
- |
- |
4.6667 |
273 |
0.01 |
- |
- |
- |
- |
- |
4.6838 |
274 |
0.0158 |
- |
- |
- |
- |
- |
4.7009 |
275 |
0.0077 |
- |
- |
- |
- |
- |
4.7179 |
276 |
0.0259 |
- |
- |
- |
- |
- |
4.7350 |
277 |
0.0204 |
- |
- |
- |
- |
- |
4.7521 |
278 |
0.0155 |
- |
- |
- |
- |
- |
4.7692 |
279 |
0.0101 |
- |
- |
- |
- |
- |
4.7863 |
280 |
0.0062 |
- |
- |
- |
- |
- |
4.8034 |
281 |
0.0065 |
- |
- |
- |
- |
- |
4.8205 |
282 |
0.0115 |
- |
- |
- |
- |
- |
4.8376 |
283 |
0.0088 |
- |
- |
- |
- |
- |
4.8547 |
284 |
0.0157 |
- |
- |
- |
- |
- |
4.8718 |
285 |
0.0145 |
- |
- |
- |
- |
- |
4.8889 |
286 |
0.0122 |
- |
- |
- |
- |
- |
4.9060 |
287 |
0.007 |
- |
- |
- |
- |
- |
4.9231 |
288 |
0.0126 |
- |
- |
- |
- |
- |
4.9402 |
289 |
0.0094 |
- |
- |
- |
- |
- |
4.9573 |
290 |
0.016 |
0.3940 |
0.4131 |
0.4148 |
0.3728 |
0.4182 |
- The bold row denotes the saved checkpoint.
Framework Versions
- Python: 3.12.2
- Sentence Transformers: 3.0.0
- Transformers: 4.41.2
- PyTorch: 2.3.1
- Accelerate: 0.27.2
- Datasets: 2.19.1
- Tokenizers: 0.19.1
Citation
BibTeX
Sentence Transformers
@inproceedings{reimers-2019-sentence-bert,
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
author = "Reimers, Nils and Gurevych, Iryna",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = "11",
year = "2019",
publisher = "Association for Computational Linguistics",
url = "https://arxiv.org/abs/1908.10084",
}
MatryoshkaLoss
@misc{kusupati2024matryoshka,
title={Matryoshka Representation Learning},
author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
year={2024},
eprint={2205.13147},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
MultipleNegativesRankingLoss
@misc{henderson2017efficient,
title={Efficient Natural Language Response Suggestion for Smart Reply},
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
year={2017},
eprint={1705.00652},
archivePrefix={arXiv},
primaryClass={cs.CL}
}