philipp-zettl
/

MiniLM-similarity-small

@@ -6,7 +6,7 @@ tags:
 - sentence-similarity
 - feature-extraction
 - generated_from_trainer
-- dataset_size:844
 - loss:CoSENTLoss
 base_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
 datasets: []
@@ -22,30 +22,30 @@ metrics:
 - pearson_max
 - spearman_max
 widget:
-- source_sentence: Help fix a problem with my device’s battery life
   sentences:
-  - order query
   - faq query
-  - technical support query
-- source_sentence: 订购一双运动鞋
   sentences:
-  - service request
   - feedback query
-  - product query
-- source_sentence: 告诉我如何更改我的密码
   sentences:
   - support query
   - product query
   - faq query
-- source_sentence: Get information on the next local festival
   sentences:
-  - event inquiry
-  - service request
   - account query
-- source_sentence: Change the currency for my payment
-  sentences:
-  - product query
-  - payment query
   - faq query
 pipeline_tag: sentence-similarity
 model-index:
@@ -59,34 +59,34 @@ model-index:
       type: MiniLM-dev
     metrics:
     - type: pearson_cosine
-      value: 0.7356955662825808
       name: Pearson Cosine
     - type: spearman_cosine
-      value: 0.7320761390174187
       name: Spearman Cosine
     - type: pearson_manhattan
-      value: 0.6240041985776243
       name: Pearson Manhattan
     - type: spearman_manhattan
-      value: 0.6179783414452009
       name: Spearman Manhattan
     - type: pearson_euclidean
-      value: 0.6321466982201008
       name: Pearson Euclidean
     - type: spearman_euclidean
-      value: 0.6296964936282937
       name: Spearman Euclidean
     - type: pearson_dot
-      value: 0.7491168439451736
       name: Pearson Dot
     - type: spearman_dot
-      value: 0.7592129124940543
       name: Spearman Dot
     - type: pearson_max
-      value: 0.7491168439451736
       name: Pearson Max
     - type: spearman_max
-      value: 0.7592129124940543
       name: Spearman Max
   - task:
       type: semantic-similarity
@@ -96,34 +96,34 @@ model-index:
       type: MiniLM-test
     metrics:
     - type: pearson_cosine
-      value: 0.7687106130417081
       name: Pearson Cosine
     - type: spearman_cosine
-      value: 0.7552108666502075
       name: Spearman Cosine
     - type: pearson_manhattan
-      value: 0.7462708006775693
       name: Pearson Manhattan
     - type: spearman_manhattan
-      value: 0.7365483246407295
       name: Spearman Manhattan
     - type: pearson_euclidean
-      value: 0.7545194410402545
       name: Pearson Euclidean
     - type: spearman_euclidean
-      value: 0.7465016803791179
       name: Spearman Euclidean
     - type: pearson_dot
-      value: 0.7251488155932073
       name: Pearson Dot
     - type: spearman_dot
-      value: 0.7390366635753267
       name: Spearman Dot
     - type: pearson_max
-      value: 0.7687106130417081
       name: Pearson Max
     - type: spearman_max
-      value: 0.7552108666502075
       name: Spearman Max
 ---
@@ -176,9 +176,9 @@ from sentence_transformers import SentenceTransformer
 model = SentenceTransformer("philipp-zettl/MiniLM-similarity-small")
 # Run inference
 sentences = [
-    'Change the currency for my payment',
-    'payment query',
     'faq query',
 ]
 embeddings = model.encode(sentences)
 print(embeddings.shape)
@@ -224,16 +224,16 @@ You can finetune this model on your own dataset.
 | Metric              | Value      |
 |:--------------------|:-----------|
-| pearson_cosine      | 0.7357     |
-| **spearman_cosine** | **0.7321** |
-| pearson_manhattan   | 0.624      |
-| spearman_manhattan  | 0.618      |
-| pearson_euclidean   | 0.6321     |
-| spearman_euclidean  | 0.6297     |
-| pearson_dot         | 0.7491     |
-| spearman_dot        | 0.7592     |
-| pearson_max         | 0.7491     |
-| spearman_max        | 0.7592     |
 #### Semantic Similarity
 * Dataset: `MiniLM-test`
@@ -241,16 +241,16 @@ You can finetune this model on your own dataset.
 | Metric              | Value      |
 |:--------------------|:-----------|
-| pearson_cosine      | 0.7687     |
-| **spearman_cosine** | **0.7552** |
-| pearson_manhattan   | 0.7463     |
-| spearman_manhattan  | 0.7365     |
-| pearson_euclidean   | 0.7545     |
-| spearman_euclidean  | 0.7465     |
-| pearson_dot         | 0.7251     |
-| spearman_dot        | 0.739      |
-| pearson_max         | 0.7687     |
-| spearman_max        | 0.7552     |
 <!--
 ## Bias, Risks and Limitations
@@ -271,19 +271,19 @@ You can finetune this model on your own dataset.
 #### Unnamed Dataset
-* Size: 844 training samples
 * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>score</code>
 * Approximate statistics based on the first 1000 samples:
-  |         | sentence1                                                                        | sentence2                                                                       | score                                                          |
-  |:--------|:---------------------------------------------------------------------------------|:--------------------------------------------------------------------------------|:---------------------------------------------------------------|
-  | type    | string                                                                           | string                                                                          | float                                                          |
-  | details | <ul><li>min: 6 tokens</li><li>mean: 10.8 tokens</li><li>max: 19 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 5.33 tokens</li><li>max: 6 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.49</li><li>max: 1.0</li></ul> |
 * Samples:
-  | sentence1                                                       | sentence2                  | score            |
-  |:----------------------------------------------------------------|:---------------------------|:-----------------|
-  | <code>Update the payment method for my order</code>             | <code>order query</code>   | <code>1.0</code> |
-  | <code>Не могу установить новое обновление, помогите!</code>     | <code>support query</code> | <code>1.0</code> |
-  | <code>Помогите мне изменить настройки конфиденциальности</code> | <code>support query</code> | <code>1.0</code> |
 * Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
   ```json
   {
@@ -297,19 +297,19 @@ You can finetune this model on your own dataset.
 #### Unnamed Dataset
-* Size: 106 evaluation samples
 * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>score</code>
 * Approximate statistics based on the first 1000 samples:
   |         | sentence1                                                                         | sentence2                                                                       | score                                                          |
   |:--------|:----------------------------------------------------------------------------------|:--------------------------------------------------------------------------------|:---------------------------------------------------------------|
   | type    | string                                                                            | string                                                                          | float                                                          |
-  | details | <ul><li>min: 6 tokens</li><li>mean: 10.79 tokens</li><li>max: 15 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 5.27 tokens</li><li>max: 6 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.51</li><li>max: 1.0</li></ul> |
 * Samples:
-  | sentence1                                                       | sentence2                            | score            |
-  |:----------------------------------------------------------------|:-------------------------------------|:-----------------|
-  | <code>帮我修复系统错误</code>                                           | <code>support query</code>           | <code>1.0</code> |
-  | <code>Je veux commander une pizza</code>                        | <code>product query</code>           | <code>1.0</code> |
-  | <code>Fix problems with my device’s Bluetooth connection</code> | <code>technical support query</code> | <code>1.0</code> |
 * Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
   ```json
   {
@@ -445,28 +445,38 @@ You can finetune this model on your own dataset.
 ### Training Logs
 | Epoch  | Step | Training Loss | loss   | MiniLM-dev_spearman_cosine | MiniLM-test_spearman_cosine |
 |:------:|:----:|:-------------:|:------:|:--------------------------:|:---------------------------:|
-| 0.0943 | 10   | 4.0771        | 2.2054 | 0.2529                     | -                           |
-| 0.1887 | 20   | 4.4668        | 1.8221 | 0.3528                     | -                           |
-| 0.2830 | 30   | 2.5459        | 1.5545 | 0.4638                     | -                           |
-| 0.3774 | 40   | 2.1926        | 1.3145 | 0.5569                     | -                           |
-| 0.4717 | 50   | 0.9001        | 1.1653 | 0.6285                     | -                           |
-| 0.5660 | 60   | 1.4049        | 1.0734 | 0.6834                     | -                           |
-| 0.6604 | 70   | 0.7204        | 0.9951 | 0.6988                     | -                           |
-| 0.7547 | 80   | 1.4023        | 1.1213 | 0.6945                     | -                           |
-| 0.8491 | 90   | 0.2315        | 1.2931 | 0.6414                     | -                           |
-| 0.9434 | 100  | 0.0018        | 1.3904 | 0.6180                     | -                           |
-| 1.0377 | 110  | 0.0494        | 1.2889 | 0.6322                     | -                           |
-| 1.1321 | 120  | 0.3156        | 1.2461 | 0.6402                     | -                           |
-| 1.2264 | 130  | 1.8153        | 1.0844 | 0.6716                     | -                           |
-| 1.3208 | 140  | 0.2638        | 0.9939 | 0.6957                     | -                           |
-| 1.4151 | 150  | 0.5454        | 0.9545 | 0.7056                     | -                           |
-| 1.5094 | 160  | 0.3421        | 0.9699 | 0.7062                     | -                           |
-| 1.6038 | 170  | 0.0035        | 0.9521 | 0.7093                     | -                           |
-| 1.6981 | 180  | 0.0401        | 0.8988 | 0.7160                     | -                           |
-| 1.7925 | 190  | 0.8138        | 0.8619 | 0.7271                     | -                           |
-| 1.8868 | 200  | 0.0236        | 0.8449 | 0.7315                     | -                           |
-| 1.9811 | 210  | 0.0012        | 0.8438 | 0.7321                     | -                           |
-| 2.0    | 212  | -             | -      | -                          | 0.7552                      |
 ### Framework Versions

 - sentence-similarity
 - feature-extraction
 - generated_from_trainer
+- dataset_size:1267
 - loss:CoSENTLoss
 base_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
 datasets: []
 - pearson_max
 - spearman_max
 widget:
+- source_sentence: Give me suggestions for a high-quality DSLR camera
   sentences:
   - faq query
+  - subscription query
+  - faq query
+- source_sentence: Aidez-moi à configurer une nouvelle adresse e-mail
   sentences:
+  - order query
+  - faq query
   - feedback query
+- source_sentence: Как я могу изменить адрес доставки?
   sentences:
   - support query
   - product query
+  - product query
+- source_sentence: ساعدني في حذف الملفات الغير مرغوب فيها من هاتفي
+  sentences:
+  - technical support query
+  - product recommendation
   - faq query
+- source_sentence: Envoyez-moi la politique de garantie de ce produit
   sentences:
+  - faq query
   - account query
   - faq query
 pipeline_tag: sentence-similarity
 model-index:
       type: MiniLM-dev
     metrics:
     - type: pearson_cosine
+      value: 0.6538226572138826
       name: Pearson Cosine
     - type: spearman_cosine
+      value: 0.6336766646599241
       name: Spearman Cosine
     - type: pearson_manhattan
+      value: 0.5799895241429639
       name: Pearson Manhattan
     - type: spearman_manhattan
+      value: 0.5525776786782183
       name: Spearman Manhattan
     - type: pearson_euclidean
+      value: 0.5732001104236694
       name: Pearson Euclidean
     - type: spearman_euclidean
+      value: 0.5394971970682657
       name: Spearman Euclidean
     - type: pearson_dot
+      value: 0.6359725423136287
       name: Pearson Dot
     - type: spearman_dot
+      value: 0.6237936341101822
       name: Spearman Dot
     - type: pearson_max
+      value: 0.6538226572138826
       name: Pearson Max
     - type: spearman_max
+      value: 0.6336766646599241
       name: Spearman Max
   - task:
       type: semantic-similarity
       type: MiniLM-test
     metrics:
     - type: pearson_cosine
+      value: 0.6682368113711722
       name: Pearson Cosine
     - type: spearman_cosine
+      value: 0.6222011918428743
       name: Spearman Cosine
     - type: pearson_manhattan
+      value: 0.5714617063306076
       name: Pearson Manhattan
     - type: spearman_manhattan
+      value: 0.5481366191719228
       name: Spearman Manhattan
     - type: pearson_euclidean
+      value: 0.5726946277850402
       name: Pearson Euclidean
     - type: spearman_euclidean
+      value: 0.549312247309557
       name: Spearman Euclidean
     - type: pearson_dot
+      value: 0.6396412507506479
       name: Pearson Dot
     - type: spearman_dot
+      value: 0.6107388175009413
       name: Spearman Dot
     - type: pearson_max
+      value: 0.6682368113711722
       name: Pearson Max
     - type: spearman_max
+      value: 0.6222011918428743
       name: Spearman Max
 ---
 model = SentenceTransformer("philipp-zettl/MiniLM-similarity-small")
 # Run inference
 sentences = [
+    'Envoyez-moi la politique de garantie de ce produit',
     'faq query',
+    'account query',
 ]
 embeddings = model.encode(sentences)
 print(embeddings.shape)
 | Metric              | Value      |
 |:--------------------|:-----------|
+| pearson_cosine      | 0.6538     |
+| **spearman_cosine** | **0.6337** |
+| pearson_manhattan   | 0.58       |
+| spearman_manhattan  | 0.5526     |
+| pearson_euclidean   | 0.5732     |
+| spearman_euclidean  | 0.5395     |
+| pearson_dot         | 0.636      |
+| spearman_dot        | 0.6238     |
+| pearson_max         | 0.6538     |
+| spearman_max        | 0.6337     |
 #### Semantic Similarity
 * Dataset: `MiniLM-test`
 | Metric              | Value      |
 |:--------------------|:-----------|
+| pearson_cosine      | 0.6682     |
+| **spearman_cosine** | **0.6222** |
+| pearson_manhattan   | 0.5715     |
+| spearman_manhattan  | 0.5481     |
+| pearson_euclidean   | 0.5727     |
+| spearman_euclidean  | 0.5493     |
+| pearson_dot         | 0.6396     |
+| spearman_dot        | 0.6107     |
+| pearson_max         | 0.6682     |
+| spearman_max        | 0.6222     |
 <!--
 ## Bias, Risks and Limitations
 #### Unnamed Dataset
+* Size: 1,267 training samples
 * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>score</code>
 * Approximate statistics based on the first 1000 samples:
+  |         | sentence1                                                                         | sentence2                                                                       | score                                                          |
+  |:--------|:----------------------------------------------------------------------------------|:--------------------------------------------------------------------------------|:---------------------------------------------------------------|
+  | type    | string                                                                            | string                                                                          | float                                                          |
+  | details | <ul><li>min: 6 tokens</li><li>mean: 10.77 tokens</li><li>max: 18 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 5.31 tokens</li><li>max: 6 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.67</li><li>max: 1.0</li></ul> |
 * Samples:
+  | sentence1                                                     | sentence2                  | score            |
+  |:--------------------------------------------------------------|:---------------------------|:-----------------|
+  | <code>Get information on the next art exhibition</code>       | <code>product query</code> | <code>0.0</code> |
+  | <code>Show me how to update my profile</code>                 | <code>product query</code> | <code>0.0</code> |
+  | <code>Покажите мне доступные варианты полетов в Турцию</code> | <code>faq query</code>     | <code>0.0</code> |
 * Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
   ```json
   {
 #### Unnamed Dataset
+* Size: 159 evaluation samples
 * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>score</code>
 * Approximate statistics based on the first 1000 samples:
   |         | sentence1                                                                         | sentence2                                                                       | score                                                          |
   |:--------|:----------------------------------------------------------------------------------|:--------------------------------------------------------------------------------|:---------------------------------------------------------------|
   | type    | string                                                                            | string                                                                          | float                                                          |
+  | details | <ul><li>min: 6 tokens</li><li>mean: 10.65 tokens</li><li>max: 17 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 5.35 tokens</li><li>max: 6 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.67</li><li>max: 1.0</li></ul> |
 * Samples:
+  | sentence1                                                      | sentence2                  | score            |
+  |:---------------------------------------------------------------|:---------------------------|:-----------------|
+  | <code>Sende mir die Bestellbestätigung per E-Mail</code>       | <code>order query</code>   | <code>0.0</code> |
+  | <code>How do I add a new payment method?</code>                | <code>faq query</code>     | <code>1.0</code> |
+  | <code>No puedo conectar mi impresora, ¿puedes ayudarme?</code> | <code>support query</code> | <code>1.0</code> |
 * Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
   ```json
   {
 ### Training Logs
 | Epoch  | Step | Training Loss | loss   | MiniLM-dev_spearman_cosine | MiniLM-test_spearman_cosine |
 |:------:|:----:|:-------------:|:------:|:--------------------------:|:---------------------------:|
+| 0.0629 | 10   | 6.2479        | 2.5890 | 0.1448                     | -                           |
+| 0.1258 | 20   | 4.3549        | 2.2787 | 0.1965                     | -                           |
+| 0.1887 | 30   | 3.5969        | 2.0104 | 0.2599                     | -                           |
+| 0.2516 | 40   | 2.4979        | 1.7269 | 0.3357                     | -                           |
+| 0.3145 | 50   | 2.5551        | 1.5747 | 0.4439                     | -                           |
+| 0.3774 | 60   | 3.1446        | 1.4892 | 0.4750                     | -                           |
+| 0.4403 | 70   | 2.1353        | 1.5305 | 0.4662                     | -                           |
+| 0.5031 | 80   | 2.9341        | 1.3718 | 0.4848                     | -                           |
+| 0.5660 | 90   | 2.8709        | 1.2469 | 0.5316                     | -                           |
+| 0.6289 | 100  | 2.1367        | 1.2558 | 0.5436                     | -                           |
+| 0.6918 | 110  | 2.2735        | 1.2939 | 0.5392                     | -                           |
+| 0.7547 | 120  | 2.8646        | 1.1206 | 0.5616                     | -                           |
+| 0.8176 | 130  | 3.3204        | 1.0213 | 0.5662                     | -                           |
+| 0.8805 | 140  | 0.8989        | 0.9866 | 0.5738                     | -                           |
+| 0.9434 | 150  | 0.0057        | 0.9961 | 0.5674                     | -                           |
+| 1.0063 | 160  | 0.0019        | 1.0111 | 0.5674                     | -                           |
+| 1.0692 | 170  | 0.4617        | 1.0275 | 0.5747                     | -                           |
+| 1.1321 | 180  | 0.0083        | 1.0746 | 0.5732                     | -                           |
+| 1.1950 | 190  | 0.5048        | 1.0968 | 0.5753                     | -                           |
+| 1.2579 | 200  | 0.0002        | 1.0840 | 0.5738                     | -                           |
+| 1.3208 | 210  | 0.07          | 1.0364 | 0.5753                     | -                           |
+| 1.3836 | 220  | 0.0           | 0.9952 | 0.5750                     | -                           |
+| 1.4465 | 230  | 0.0           | 0.9922 | 0.5744                     | -                           |
+| 1.5094 | 240  | 0.0           | 0.9923 | 0.5726                     | -                           |
+| 1.0126 | 250  | 0.229         | 0.9930 | 0.5729                     | -                           |
+| 1.0755 | 260  | 2.2061        | 0.9435 | 0.5880                     | -                           |
+| 1.1384 | 270  | 2.7711        | 0.8892 | 0.6078                     | -                           |
+| 1.2013 | 280  | 0.7528        | 0.8886 | 0.6148                     | -                           |
+| 1.2642 | 290  | 0.386         | 0.8927 | 0.6162                     | -                           |
+| 1.3270 | 300  | 0.8902        | 0.8710 | 0.6267                     | -                           |
+| 1.3899 | 310  | 0.9534        | 0.8429 | 0.6337                     | -                           |
+| 1.4403 | 318  | -             | -      | -                          | 0.6222                      |
 ### Framework Versions

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:944d69b0e22c70edbadcb4a35df9b7c8243f8601d9962798cbea41342b1c6406
 size 470637416

 version https://git-lfs.github.com/spec/v1
+oid sha256:a480f8a3b0abde34feef318b982835792b5781f388c0cbeb144e8d54ef77f2a3
 size 470637416