Complete Sentence Transformers integration + patch inference on CPU & Windows (#4)
Browse files- Remove reference_compile; complete Sentence Transformers support (5c66f04b1ffb7a7db1a480361c8a574deed2e3b7)
- Notify users that flash_attn is recommended (b94cf64d6cb8ea2e1746f7e6df169b8300804035)
- README.md +29 -10
- config.json +0 -1
- config_sentence_transformers.json +10 -0
- modules.json +14 -0
README.md
CHANGED
@@ -6,6 +6,8 @@ base_model:
|
|
6 |
- answerdotai/ModernBERT-base
|
7 |
pipeline_tag: sentence-similarity
|
8 |
library_name: transformers
|
|
|
|
|
9 |
---
|
10 |
|
11 |
# gte-modernbert-base
|
@@ -33,10 +35,17 @@ The `gte-modernbert` models demonstrates competitive performance in several text
|
|
33 |
|
34 |
## Usage
|
35 |
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
```python
|
39 |
-
# Requires transformers>=4.
|
40 |
|
41 |
import torch.nn.functional as F
|
42 |
from transformers import AutoModel, AutoTokenizer
|
@@ -48,9 +57,9 @@ input_texts = [
|
|
48 |
"sorting algorithms"
|
49 |
]
|
50 |
|
51 |
-
model_path =
|
52 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
53 |
-
model = AutoModel.from_pretrained(model_path
|
54 |
|
55 |
# Tokenize the input texts
|
56 |
batch_dict = tokenizer(input_texts, max_length=8192, padding=True, truncation=True, return_tensors='pt')
|
@@ -62,21 +71,31 @@ embeddings = outputs.last_hidden_state[:, 0]
|
|
62 |
embeddings = F.normalize(embeddings, p=2, dim=1)
|
63 |
scores = (embeddings[:1] @ embeddings[1:].T) * 100
|
64 |
print(scores.tolist())
|
|
|
65 |
```
|
66 |
|
67 |
Use with `sentence-transformers`:
|
68 |
|
69 |
```python
|
70 |
-
# Requires
|
71 |
-
|
72 |
from sentence_transformers import SentenceTransformer
|
73 |
from sentence_transformers.util import cos_sim
|
74 |
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
```
|
81 |
|
82 |
Use with `transformers.js`:
|
|
|
6 |
- answerdotai/ModernBERT-base
|
7 |
pipeline_tag: sentence-similarity
|
8 |
library_name: transformers
|
9 |
+
tags:
|
10 |
+
- sentence-transformers
|
11 |
---
|
12 |
|
13 |
# gte-modernbert-base
|
|
|
35 |
|
36 |
## Usage
|
37 |
|
38 |
+
> [!TIP]
|
39 |
+
> For `transformers` and `sentence-transformers`, if your GPU supports it, the efficient Flash Attention 2 will be used automatically if you have `flash_attn` installed. It is not mandatory.
|
40 |
+
>
|
41 |
+
> ```bash
|
42 |
+
> pip install flash_attn
|
43 |
+
> ```
|
44 |
+
|
45 |
+
Use with `transformers`
|
46 |
|
47 |
```python
|
48 |
+
# Requires transformers>=4.48.0
|
49 |
|
50 |
import torch.nn.functional as F
|
51 |
from transformers import AutoModel, AutoTokenizer
|
|
|
57 |
"sorting algorithms"
|
58 |
]
|
59 |
|
60 |
+
model_path = "Alibaba-NLP/gte-modernbert-base"
|
61 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
62 |
+
model = AutoModel.from_pretrained(model_path)
|
63 |
|
64 |
# Tokenize the input texts
|
65 |
batch_dict = tokenizer(input_texts, max_length=8192, padding=True, truncation=True, return_tensors='pt')
|
|
|
71 |
embeddings = F.normalize(embeddings, p=2, dim=1)
|
72 |
scores = (embeddings[:1] @ embeddings[1:].T) * 100
|
73 |
print(scores.tolist())
|
74 |
+
# [[42.89073944091797, 71.30911254882812, 33.664554595947266]]
|
75 |
```
|
76 |
|
77 |
Use with `sentence-transformers`:
|
78 |
|
79 |
```python
|
80 |
+
# Requires transformers>=4.48.0
|
|
|
81 |
from sentence_transformers import SentenceTransformer
|
82 |
from sentence_transformers.util import cos_sim
|
83 |
|
84 |
+
input_texts = [
|
85 |
+
"what is the capital of China?",
|
86 |
+
"how to implement quick sort in python?",
|
87 |
+
"Beijing",
|
88 |
+
"sorting algorithms"
|
89 |
+
]
|
90 |
+
|
91 |
+
model = SentenceTransformer("Alibaba-NLP/gte-modernbert-base")
|
92 |
+
embeddings = model.encode(input_texts)
|
93 |
+
print(embeddings.shape)
|
94 |
+
# (4, 768)
|
95 |
|
96 |
+
similarities = cos_sim(embeddings[0], embeddings[1:])
|
97 |
+
print(similarities)
|
98 |
+
# tensor([[0.4289, 0.7131, 0.3366]])
|
99 |
```
|
100 |
|
101 |
Use with `transformers.js`:
|
config.json
CHANGED
@@ -35,7 +35,6 @@
|
|
35 |
"num_hidden_layers": 22,
|
36 |
"pad_token_id": 50283,
|
37 |
"position_embedding_type": "absolute",
|
38 |
-
"reference_compile": true,
|
39 |
"sep_token_id": 50282,
|
40 |
"sparse_pred_ignore_index": -100,
|
41 |
"sparse_prediction": false,
|
|
|
35 |
"num_hidden_layers": 22,
|
36 |
"pad_token_id": 50283,
|
37 |
"position_embedding_type": "absolute",
|
|
|
38 |
"sep_token_id": 50282,
|
39 |
"sparse_pred_ignore_index": -100,
|
40 |
"sparse_prediction": false,
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "2.7.0",
|
4 |
+
"transformers": "4.48.0",
|
5 |
+
"pytorch": "2.5.0+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|