Update README.md
Browse files
README.md
CHANGED
@@ -68,7 +68,6 @@ cos_sim = sim(embeddings.unsqueeze(1),
|
|
68 |
embeddings.unsqueeze(0))
|
69 |
|
70 |
print(f"Distance: {cos_sim[0,1].detach().item()}")
|
71 |
-
|
72 |
```
|
73 |
|
74 |
## Example 2) - Clustering
|
@@ -144,11 +143,47 @@ umap_model.fit(embeddings)
|
|
144 |
|
145 |
# Plot result
|
146 |
umap_plot.points(umap_model, labels = np.array(classes),theme='fire')
|
147 |
-
|
148 |
```
|
149 |
|
150 |
![UMAP Cluster](https://raw.githubusercontent.com/TJKlein/tjklein.github.io/master/images/miCSE_UMAP_small2.png)
|
151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
# Benchmark
|
153 |
|
154 |
Model results on SentEval Benchmark:
|
@@ -160,7 +195,6 @@ Model results on SentEval Benchmark:
|
|
160 |
+-------+-------+-------+-------+-------+--------------+-----------------+--------+
|
161 |
```
|
162 |
|
163 |
-
|
164 |
## Citations
|
165 |
If you use this code in your research or want to refer to our work, please cite:
|
166 |
|
|
|
68 |
embeddings.unsqueeze(0))
|
69 |
|
70 |
print(f"Distance: {cos_sim[0,1].detach().item()}")
|
|
|
71 |
```
|
72 |
|
73 |
## Example 2) - Clustering
|
|
|
143 |
|
144 |
# Plot result
|
145 |
umap_plot.points(umap_model, labels = np.array(classes),theme='fire')
|
|
|
146 |
```
|
147 |
|
148 |
![UMAP Cluster](https://raw.githubusercontent.com/TJKlein/tjklein.github.io/master/images/miCSE_UMAP_small2.png)
|
149 |
|
150 |
+
|
151 |
+
## Example 3) - Using [SentenceTransformers](https://www.sbert.net/)
|
152 |
+
|
153 |
+
```python
|
154 |
+
from sentence_transformers import SentenceTransformer, util
|
155 |
+
from sentence_transformers import models
|
156 |
+
import torch.nn as nn
|
157 |
+
|
158 |
+
# Using the model with [CLS] embeddings
|
159 |
+
model_name = 'sap-ai-research/miCSE'
|
160 |
+
word_embedding_model = models.Transformer(model_name, max_seq_length=32)
|
161 |
+
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension())
|
162 |
+
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
|
163 |
+
|
164 |
+
# Using cosine similarity as metric
|
165 |
+
cos_sim = nn.CosineSimilarity(dim=-1)
|
166 |
+
|
167 |
+
# List of sentences for comparison
|
168 |
+
sentences_1 = ["This is a sentence for testing miCSE.",
|
169 |
+
"This is using mutual information Contrastive Sentence Embeddings model."]
|
170 |
+
|
171 |
+
sentences_2 = ["This is testing miCSE.",
|
172 |
+
"Similarity with miCSE"]
|
173 |
+
|
174 |
+
# Compute embedding for both lists
|
175 |
+
embeddings_1 = model.encode(sentences_1, convert_to_tensor=True)
|
176 |
+
embeddings_2 = model.encode(sentences_2, convert_to_tensor=True)
|
177 |
+
|
178 |
+
# Compute cosine similarities
|
179 |
+
cosine_sim_scores = cos_sim(embeddings_1, embeddings_2)
|
180 |
+
|
181 |
+
#Output of results
|
182 |
+
for i in range(len(sentences1)):
|
183 |
+
print(f"Similarity {cosine_scores[i][i]:.2f}: {sentences1[i]} << vs. >> {sentences2[i]}")
|
184 |
+
```
|
185 |
+
|
186 |
+
|
187 |
# Benchmark
|
188 |
|
189 |
Model results on SentEval Benchmark:
|
|
|
195 |
+-------+-------+-------+-------+-------+--------------+-----------------+--------+
|
196 |
```
|
197 |
|
|
|
198 |
## Citations
|
199 |
If you use this code in your research or want to refer to our work, please cite:
|
200 |
|