Tom Aarsen
commited on
Commit
•
7260df6
1
Parent(s):
7e90d35
Add ST-specific configuration files with model.save()
Browse files- 1_Pooling/config.json +10 -0
- README.md +7 -4
- config_sentence_transformers.json +10 -0
- modules.json +14 -0
- sentence_bert_config.json +4 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 4096,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": true,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
---
|
2 |
tags:
|
3 |
- mteb
|
|
|
4 |
model-index:
|
5 |
- name: Salesforce/SFR-Embedding-2_R
|
6 |
results:
|
@@ -2007,11 +2008,13 @@ print(scores.tolist())
|
|
2007 |
|
2008 |
### Sentence Transformers
|
2009 |
```python
|
2010 |
-
|
2011 |
-
from sentence_transformers import SentenceTransformer, util
|
2012 |
|
2013 |
model = SentenceTransformer("Salesforce/SFR-Embedding-2_R")
|
2014 |
|
|
|
|
|
|
|
2015 |
def get_detailed_instruct(task_description: str, query: str) -> str:
|
2016 |
return f'Instruct: {task_description}\nQuery: {query}'
|
2017 |
|
@@ -2028,9 +2031,9 @@ passages = [
|
|
2028 |
]
|
2029 |
|
2030 |
embeddings = model.encode(queries + passages)
|
2031 |
-
scores =
|
2032 |
print(scores.tolist())
|
2033 |
-
# [[40.
|
2034 |
```
|
2035 |
|
2036 |
|
|
|
1 |
---
|
2 |
tags:
|
3 |
- mteb
|
4 |
+
- sentence-transformers
|
5 |
model-index:
|
6 |
- name: Salesforce/SFR-Embedding-2_R
|
7 |
results:
|
|
|
2008 |
|
2009 |
### Sentence Transformers
|
2010 |
```python
|
2011 |
+
from sentence_transformers import SentenceTransformer
|
|
|
2012 |
|
2013 |
model = SentenceTransformer("Salesforce/SFR-Embedding-2_R")
|
2014 |
|
2015 |
+
# Reduce the max length if desired
|
2016 |
+
model.max_seq_length = 4096
|
2017 |
+
|
2018 |
def get_detailed_instruct(task_description: str, query: str) -> str:
|
2019 |
return f'Instruct: {task_description}\nQuery: {query}'
|
2020 |
|
|
|
2031 |
]
|
2032 |
|
2033 |
embeddings = model.encode(queries + passages)
|
2034 |
+
scores = model.similarity(embeddings[:2], embeddings[2:]) * 100
|
2035 |
print(scores.tolist())
|
2036 |
+
# [[40.13203811645508, 25.032546997070312], [15.00684642791748, 39.937339782714844]]
|
2037 |
```
|
2038 |
|
2039 |
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.0.1",
|
4 |
+
"transformers": "4.41.2",
|
5 |
+
"pytorch": "2.3.0+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 32768,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|