Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

final/README.md +116 -116
final/config.json +1 -1
final/model.safetensors +2 -2
runs/Jun03_21-23-40_ruche-gpu18.cluster/events.out.tfevents.1717442695.ruche-gpu18.cluster.1785.0 +3 -0
runs/Jun03_21-55-04_ruche-gpu18.cluster/events.out.tfevents.1717444545.ruche-gpu18.cluster.20850.0 +3 -0

final/README.md CHANGED Viewed

@@ -7,7 +7,7 @@ tags:
 - sentence-similarity
 - feature-extraction
 - dataset_size:100K<n<1M
-- loss:MultipleNegativesRankingLoss
 base_model: FacebookAI/xlm-roberta-large
 metrics:
 - cosine_accuracy
@@ -18,29 +18,29 @@ metrics:
 widget:
 - source_sentence: The boy scowls
   sentences:
-  - People are around a fire
-  - Boy playing baseball.
-  - The girls are at school.
-- source_sentence: an eagle flies
-  sentences:
-  - A man floats up a ladder.
-  - He is playing a song.
-  - The t-shirt is white.
 - source_sentence: A woman sings.
   sentences:
-  - The woman is outdoors.
-  - the animal is running
-  - A man is playing indoors.
 - source_sentence: A bird flying.
   sentences:
-  - No one is on a canoe.
-  - A man is on his feet.
-  - Two men listen to music.
 - source_sentence: There's a dock
   sentences:
-  - The man is performing.
-  - Five people on a path
-  - The elephant sits on a dog
 pipeline_tag: sentence-similarity
 model-index:
 - name: SentenceTransformer based on FacebookAI/xlm-roberta-large
@@ -53,19 +53,19 @@ model-index:
       type: all-nli-dev
     metrics:
     - type: cosine_accuracy
-      value: 0.452
       name: Cosine Accuracy
     - type: dot_accuracy
-      value: 0.34
       name: Dot Accuracy
     - type: manhattan_accuracy
-      value: 0.456
       name: Manhattan Accuracy
     - type: euclidean_accuracy
-      value: 0.452
       name: Euclidean Accuracy
     - type: max_accuracy
-      value: 0.456
       name: Max Accuracy
   - task:
       type: triplet
@@ -75,19 +75,19 @@ model-index:
       type: all-nli-test
     metrics:
     - type: cosine_accuracy
-      value: 0.481
       name: Cosine Accuracy
     - type: dot_accuracy
-      value: 0.364
       name: Dot Accuracy
     - type: manhattan_accuracy
-      value: 0.48
       name: Manhattan Accuracy
     - type: euclidean_accuracy
-      value: 0.481
       name: Euclidean Accuracy
     - type: max_accuracy
-      value: 0.481
       name: Max Accuracy
 ---
@@ -142,8 +142,8 @@ model = SentenceTransformer("sentence_transformers_model_id")
 # Run inference
 sentences = [
     "There's a dock",
-    'The man is performing.',
-    'Five people on a path',
 ]
 embeddings = model.encode(sentences)
 print(embeddings.shape)
@@ -189,11 +189,11 @@ You can finetune this model on your own dataset.
 | Metric             | Value     |
 |:-------------------|:----------|
-| cosine_accuracy    | 0.452     |
-| dot_accuracy       | 0.34      |
-| manhattan_accuracy | 0.456     |
-| euclidean_accuracy | 0.452     |
-| **max_accuracy**   | **0.456** |
 #### Triplet
 * Dataset: `all-nli-test`
@@ -201,11 +201,11 @@ You can finetune this model on your own dataset.
 | Metric             | Value     |
 |:-------------------|:----------|
-| cosine_accuracy    | 0.481     |
-| dot_accuracy       | 0.364     |
-| manhattan_accuracy | 0.48      |
-| euclidean_accuracy | 0.481     |
-| **max_accuracy**   | **0.481** |
 <!--
 ## Bias, Risks and Limitations
@@ -239,7 +239,7 @@ You can finetune this model on your own dataset.
   | <code>A person on a horse jumps over a broken down airplane.</code>        | <code>A person is outdoors, on a horse.</code>   | <code>A person is at a diner, ordering an omelette.</code> |
   | <code>Children smiling and waving at camera</code>                         | <code>There are children present</code>          | <code>The kids are frowning</code>                         |
   | <code>A boy is jumping on skateboard in the middle of a red bridge.</code> | <code>The boy does a skateboarding trick.</code> | <code>The boy skates down the sidewalk.</code>             |
-* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
   ```json
   {
       "scale": 20.0,
@@ -265,7 +265,7 @@ You can finetune this model on your own dataset.
   | <code>Two women are embracing while holding to go packages.</code>                                                                                                             | <code>Two woman are holding packages.</code>                | <code>The men are fighting outside a deli.</code>       |
   | <code>Two young children in blue jerseys, one with the number 9 and one with the number 2 are standing on wooden steps in a bathroom and washing their hands in a sink.</code> | <code>Two kids in numbered jerseys wash their hands.</code> | <code>Two kids in jackets walk to school.</code>        |
   | <code>A man selling donuts to a customer during a world exhibition event held in the city of Angeles</code>                                                                    | <code>A man selling donuts to a customer.</code>            | <code>A woman drinks her coffee in a small cafe.</code> |
-* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
   ```json
   {
       "scale": 20.0,
@@ -281,7 +281,7 @@ You can finetune this model on your own dataset.
 - `per_device_eval_batch_size`: 16
 - `num_train_epochs`: 1
 - `warmup_ratio`: 0.1
-- `fp16`: True
 - `batch_sampler`: no_duplicates
 #### All Hyperparameters
@@ -324,8 +324,8 @@ You can finetune this model on your own dataset.
 - `data_seed`: None
 - `jit_mode_eval`: False
 - `use_ipex`: False
-- `bf16`: False
-- `fp16`: True
 - `fp16_opt_level`: O1
 - `half_precision_backend`: auto
 - `bf16_full_eval`: False
@@ -401,70 +401,70 @@ You can finetune this model on your own dataset.
 ### Training Logs
 | Epoch | Step | Training Loss | loss   | all-nli-dev_max_accuracy | all-nli-test_max_accuracy |
 |:-----:|:----:|:-------------:|:------:|:------------------------:|:-------------------------:|
-| 0     | 0    | -             | -      | 0.616                    | -                         |
-| 0.016 | 100  | 3.2768        | 1.8053 | 0.833                    | -                         |
-| 0.032 | 200  | 1.1697        | 1.2878 | 0.861                    | -                         |
-| 0.048 | 300  | 1.372         | 1.2466 | 0.861                    | -                         |
-| 0.064 | 400  | 1.0476        | 1.2291 | 0.863                    | -                         |
-| 0.08  | 500  | 0.8588        | 1.5259 | 0.838                    | -                         |
-| 0.096 | 600  | 2.9781        | 3.4309 | 0.463                    | -                         |
-| 0.112 | 700  | 3.4982        | 3.4309 | 0.457                    | -                         |
-| 0.128 | 800  | 3.467         | 3.4309 | 0.479                    | -                         |
-| 0.144 | 900  | 3.4665        | 3.4309 | 0.452                    | -                         |
-| 0.16  | 1000 | 3.4664        | 3.4309 | 0.477                    | -                         |
-| 0.176 | 1100 | 3.4663        | 3.4309 | 0.458                    | -                         |
-| 0.192 | 1200 | 3.4661        | 3.4309 | 0.462                    | -                         |
-| 0.208 | 1300 | 3.4658        | 3.4309 | 0.45                     | -                         |
-| 0.224 | 1400 | 3.4661        | 3.4309 | 0.481                    | -                         |
-| 0.24  | 1500 | 3.4877        | 3.4309 | 0.464                    | -                         |
-| 0.256 | 1600 | 3.4675        | 3.4309 | 0.462                    | -                         |
-| 0.272 | 1700 | 3.4665        | 3.4309 | 0.488                    | -                         |
-| 0.288 | 1800 | 3.4667        | 3.4309 | 0.492                    | -                         |
-| 0.304 | 1900 | 3.4664        | 3.4309 | 0.455                    | -                         |
-| 0.32  | 2000 | 3.4661        | 3.4309 | 0.453                    | -                         |
-| 0.336 | 2100 | 3.4666        | 3.4309 | 0.477                    | -                         |
-| 0.352 | 2200 | 3.4683        | 3.4309 | 0.48                     | -                         |
-| 0.368 | 2300 | 3.4663        | 3.4309 | 0.469                    | -                         |
-| 0.384 | 2400 | 3.4667        | 3.4309 | 0.448                    | -                         |
-| 0.4   | 2500 | 3.4669        | 3.4309 | 0.499                    | -                         |
-| 0.416 | 2600 | 3.4661        | 3.4309 | 0.453                    | -                         |
-| 0.432 | 2700 | 3.4656        | 3.4309 | 0.467                    | -                         |
-| 0.448 | 2800 | 3.4662        | 3.4309 | 0.507                    | -                         |
-| 0.464 | 2900 | 3.4902        | 3.4309 | 0.473                    | -                         |
-| 0.48  | 3000 | 3.4663        | 3.4309 | 0.469                    | -                         |
-| 0.496 | 3100 | 3.554         | 3.4309 | 0.46                     | -                         |
-| 0.512 | 3200 | 3.4664        | 3.4309 | 0.455                    | -                         |
-| 0.528 | 3300 | 3.4668        | 3.4309 | 0.46                     | -                         |
-| 0.544 | 3400 | 3.4661        | 3.4309 | 0.492                    | -                         |
-| 0.56  | 3500 | 3.4667        | 3.4309 | 0.432                    | -                         |
-| 0.576 | 3600 | 3.4668        | 3.4309 | 0.486                    | -                         |
-| 0.592 | 3700 | 3.4666        | 3.4309 | 0.469                    | -                         |
-| 0.608 | 3800 | 3.4669        | 3.4309 | 0.473                    | -                         |
-| 0.624 | 3900 | 3.4658        | 3.4309 | 0.487                    | -                         |
-| 0.64  | 4000 | 3.4663        | 3.4309 | 0.448                    | -                         |
-| 0.656 | 4100 | 3.4663        | 3.4309 | 0.465                    | -                         |
-| 0.672 | 4200 | 3.4664        | 3.4309 | 0.484                    | -                         |
-| 0.688 | 4300 | 3.4663        | 3.4309 | 0.469                    | -                         |
-| 0.704 | 4400 | 3.4661        | 3.4309 | 0.478                    | -                         |
-| 0.72  | 4500 | 3.4669        | 3.4309 | 0.467                    | -                         |
-| 0.736 | 4600 | 3.4664        | 3.4309 | 0.455                    | -                         |
-| 0.752 | 4700 | 3.4664        | 3.4309 | 0.481                    | -                         |
-| 0.768 | 4800 | 3.4659        | 3.4309 | 0.466                    | -                         |
-| 0.784 | 4900 | 3.466         | 3.4309 | 0.451                    | -                         |
-| 0.8   | 5000 | 3.466         | 3.4309 | 0.473                    | -                         |
-| 0.816 | 5100 | 3.4664        | 3.4309 | 0.44                     | -                         |
-| 0.832 | 5200 | 3.4658        | 3.4309 | 0.497                    | -                         |
-| 0.848 | 5300 | 3.4664        | 3.4309 | 0.474                    | -                         |
-| 0.864 | 5400 | 3.4658        | 3.4309 | 0.449                    | -                         |
-| 0.88  | 5500 | 3.4662        | 3.4309 | 0.466                    | -                         |
-| 0.896 | 5600 | 3.4663        | 3.4309 | 0.476                    | -                         |
-| 0.912 | 5700 | 3.4667        | 3.4309 | 0.455                    | -                         |
-| 0.928 | 5800 | 3.4669        | 3.4309 | 0.463                    | -                         |
-| 0.944 | 5900 | 3.4657        | 3.4309 | 0.467                    | -                         |
-| 0.96  | 6000 | 3.4671        | 3.4309 | 0.456                    | -                         |
-| 0.976 | 6100 | 2.9471        | 3.4309 | 0.484                    | -                         |
-| 0.992 | 6200 | 0.6929        | 3.4309 | 0.456                    | -                         |
-| 1.0   | 6250 | -             | -      | -                        | 0.481                     |
 ### Framework Versions
@@ -493,15 +493,15 @@ You can finetune this model on your own dataset.
 }
 ```
-#### MultipleNegativesRankingLoss
 ```bibtex
-@misc{henderson2017efficient,
-    title={Efficient Natural Language Response Suggestion for Smart Reply},
-    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
-    year={2017},
-    eprint={1705.00652},
     archivePrefix={arXiv},
-    primaryClass={cs.CL}
 }
 ```

 - sentence-similarity
 - feature-extraction
 - dataset_size:100K<n<1M
+- loss:CachedMultipleNegativesRankingLoss
 base_model: FacebookAI/xlm-roberta-large
 metrics:
 - cosine_accuracy
 widget:
 - source_sentence: The boy scowls
   sentences:
+  - The boy is outside.
+  - The man is in a city.
+  - A woman at home.
 - source_sentence: A woman sings.
   sentences:
+  - The woman is singing.
+  - a man is wearing blue
+  - The boys are eating.
 - source_sentence: A bird flying.
   sentences:
+  - A butterfly flys freely.
+  - She checks her phone.
+  - A man is sleeping.
+- source_sentence: an eagle flies
+  sentences:
+  - A butterfly flys freely.
+  - The men are together.
+  - A man is sleeping.
 - source_sentence: There's a dock
   sentences:
+  - There are people outdoors
+  - Boy playing baseball.
+  - A man is sleeping.
 pipeline_tag: sentence-similarity
 model-index:
 - name: SentenceTransformer based on FacebookAI/xlm-roberta-large
       type: all-nli-dev
     metrics:
     - type: cosine_accuracy
+      value: 0.941
       name: Cosine Accuracy
     - type: dot_accuracy
+      value: 0.062
       name: Dot Accuracy
     - type: manhattan_accuracy
+      value: 0.937
       name: Manhattan Accuracy
     - type: euclidean_accuracy
+      value: 0.938
       name: Euclidean Accuracy
     - type: max_accuracy
+      value: 0.941
       name: Max Accuracy
   - task:
       type: triplet
       type: all-nli-test
     metrics:
     - type: cosine_accuracy
+      value: 0.943
       name: Cosine Accuracy
     - type: dot_accuracy
+      value: 0.057
       name: Dot Accuracy
     - type: manhattan_accuracy
+      value: 0.947
       name: Manhattan Accuracy
     - type: euclidean_accuracy
+      value: 0.947
       name: Euclidean Accuracy
     - type: max_accuracy
+      value: 0.947
       name: Max Accuracy
 ---
 # Run inference
 sentences = [
     "There's a dock",
+    'There are people outdoors',
+    'Boy playing baseball.',
 ]
 embeddings = model.encode(sentences)
 print(embeddings.shape)
 | Metric             | Value     |
 |:-------------------|:----------|
+| cosine_accuracy    | 0.941     |
+| dot_accuracy       | 0.062     |
+| manhattan_accuracy | 0.937     |
+| euclidean_accuracy | 0.938     |
+| **max_accuracy**   | **0.941** |
 #### Triplet
 * Dataset: `all-nli-test`
 | Metric             | Value     |
 |:-------------------|:----------|
+| cosine_accuracy    | 0.943     |
+| dot_accuracy       | 0.057     |
+| manhattan_accuracy | 0.947     |
+| euclidean_accuracy | 0.947     |
+| **max_accuracy**   | **0.947** |
 <!--
 ## Bias, Risks and Limitations
   | <code>A person on a horse jumps over a broken down airplane.</code>        | <code>A person is outdoors, on a horse.</code>   | <code>A person is at a diner, ordering an omelette.</code> |
   | <code>Children smiling and waving at camera</code>                         | <code>There are children present</code>          | <code>The kids are frowning</code>                         |
   | <code>A boy is jumping on skateboard in the middle of a red bridge.</code> | <code>The boy does a skateboarding trick.</code> | <code>The boy skates down the sidewalk.</code>             |
+* Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
   ```json
   {
       "scale": 20.0,
   | <code>Two women are embracing while holding to go packages.</code>                                                                                                             | <code>Two woman are holding packages.</code>                | <code>The men are fighting outside a deli.</code>       |
   | <code>Two young children in blue jerseys, one with the number 9 and one with the number 2 are standing on wooden steps in a bathroom and washing their hands in a sink.</code> | <code>Two kids in numbered jerseys wash their hands.</code> | <code>Two kids in jackets walk to school.</code>        |
   | <code>A man selling donuts to a customer during a world exhibition event held in the city of Angeles</code>                                                                    | <code>A man selling donuts to a customer.</code>            | <code>A woman drinks her coffee in a small cafe.</code> |
+* Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
   ```json
   {
       "scale": 20.0,
 - `per_device_eval_batch_size`: 16
 - `num_train_epochs`: 1
 - `warmup_ratio`: 0.1
+- `bf16`: True
 - `batch_sampler`: no_duplicates
 #### All Hyperparameters
 - `data_seed`: None
 - `jit_mode_eval`: False
 - `use_ipex`: False
+- `bf16`: True
+- `fp16`: False
 - `fp16_opt_level`: O1
 - `half_precision_backend`: auto
 - `bf16_full_eval`: False
 ### Training Logs
 | Epoch | Step | Training Loss | loss   | all-nli-dev_max_accuracy | all-nli-test_max_accuracy |
 |:-----:|:----:|:-------------:|:------:|:------------------------:|:-------------------------:|
+| 0     | 0    | -             | -      | 0.613                    | -                         |
+| 0.016 | 100  | 3.4639        | 3.4199 | 0.621                    | -                         |
+| 0.032 | 200  | 3.4496        | 3.1967 | 0.841                    | -                         |
+| 0.048 | 300  | 2.2928        | 1.0476 | 0.864                    | -                         |
+| 0.064 | 400  | 1.2217        | 0.9993 | 0.871                    | -                         |
+| 0.08  | 500  | 1.1075        | 1.2674 | 0.85                     | -                         |
+| 0.096 | 600  | 1.2113        | 1.2565 | 0.866                    | -                         |
+| 0.112 | 700  | 1.0326        | 1.3313 | 0.855                    | -                         |
+| 0.128 | 800  | 1.2326        | 1.3698 | 0.851                    | -                         |
+| 0.144 | 900  | 1.2897        | 1.2690 | 0.855                    | -                         |
+| 0.16  | 1000 | 1.275         | 1.1231 | 0.863                    | -                         |
+| 0.176 | 1100 | 1.0823        | 1.2453 | 0.853                    | -                         |
+| 0.192 | 1200 | 1.1933        | 1.1119 | 0.868                    | -                         |
+| 0.208 | 1300 | 1.0102        | 0.9491 | 0.86                     | -                         |
+| 0.224 | 1400 | 0.8738        | 1.0682 | 0.87                     | -                         |
+| 0.24  | 1500 | 0.9482        | 0.8546 | 0.89                     | -                         |
+| 0.256 | 1600 | 0.6985        | 0.9136 | 0.88                     | -                         |
+| 0.272 | 1700 | 0.9908        | 0.9539 | 0.873                    | -                         |
+| 0.288 | 1800 | 1.0166        | 0.9277 | 0.878                    | -                         |
+| 0.304 | 1900 | 0.9441        | 0.9000 | 0.886                    | -                         |
+| 0.32  | 2000 | 0.8911        | 0.8364 | 0.891                    | -                         |
+| 0.336 | 2100 | 0.6746        | 0.8585 | 0.883                    | -                         |
+| 0.352 | 2200 | 0.7379        | 0.8332 | 0.888                    | -                         |
+| 0.368 | 2300 | 0.896         | 0.7617 | 0.89                     | -                         |
+| 0.384 | 2400 | 0.7901        | 0.7351 | 0.887                    | -                         |
+| 0.4   | 2500 | 0.811         | 0.7855 | 0.89                     | -                         |
+| 0.416 | 2600 | 0.6723        | 0.6756 | 0.899                    | -                         |
+| 0.432 | 2700 | 0.8839        | 0.7839 | 0.894                    | -                         |
+| 0.448 | 2800 | 0.9027        | 0.7319 | 0.903                    | -                         |
+| 0.464 | 2900 | 0.9276        | 0.7038 | 0.893                    | -                         |
+| 0.48  | 3000 | 0.7692        | 0.6653 | 0.903                    | -                         |
+| 0.496 | 3100 | 0.8044        | 0.6466 | 0.901                    | -                         |
+| 0.512 | 3200 | 0.6433        | 0.6145 | 0.906                    | -                         |
+| 0.528 | 3300 | 0.6642        | 0.5774 | 0.912                    | -                         |
+| 0.544 | 3400 | 0.5904        | 0.6054 | 0.907                    | -                         |
+| 0.56  | 3500 | 0.6378        | 0.5841 | 0.91                     | -                         |
+| 0.576 | 3600 | 0.5602        | 0.5444 | 0.921                    | -                         |
+| 0.592 | 3700 | 0.6436        | 0.5563 | 0.917                    | -                         |
+| 0.608 | 3800 | 0.588         | 0.5108 | 0.927                    | -                         |
+| 0.624 | 3900 | 0.5834        | 0.5059 | 0.925                    | -                         |
+| 0.64  | 4000 | 0.842         | 0.5217 | 0.929                    | -                         |
+| 0.656 | 4100 | 1.0995        | 0.5060 | 0.933                    | -                         |
+| 0.672 | 4200 | 0.9605        | 0.4842 | 0.928                    | -                         |
+| 0.688 | 4300 | 0.7811        | 0.4756 | 0.93                     | -                         |
+| 0.704 | 4400 | 0.7288        | 0.4650 | 0.938                    | -                         |
+| 0.72  | 4500 | 0.6636        | 0.4576 | 0.94                     | -                         |
+| 0.736 | 4600 | 0.7445        | 0.4552 | 0.934                    | -                         |
+| 0.752 | 4700 | 0.7687        | 0.4511 | 0.934                    | -                         |
+| 0.768 | 4800 | 0.7101        | 0.4446 | 0.936                    | -                         |
+| 0.784 | 4900 | 0.6586        | 0.4378 | 0.937                    | -                         |
+| 0.8   | 5000 | 0.789         | 0.4368 | 0.938                    | -                         |
+| 0.816 | 5100 | 0.6227        | 0.4344 | 0.941                    | -                         |
+| 0.832 | 5200 | 0.6994        | 0.4349 | 0.939                    | -                         |
+| 0.848 | 5300 | 0.687         | 0.4327 | 0.943                    | -                         |
+| 0.864 | 5400 | 0.76          | 0.4319 | 0.943                    | -                         |
+| 0.88  | 5500 | 0.6644        | 0.4323 | 0.941                    | -                         |
+| 0.896 | 5600 | 0.6535        | 0.4306 | 0.941                    | -                         |
+| 0.912 | 5700 | 0.7622        | 0.4289 | 0.941                    | -                         |
+| 0.928 | 5800 | 0.7053        | 0.4288 | 0.94                     | -                         |
+| 0.944 | 5900 | 0.8093        | 0.4289 | 0.94                     | -                         |
+| 0.96  | 6000 | 0.8658        | 0.4284 | 0.941                    | -                         |
+| 0.976 | 6100 | 0.7624        | 0.4283 | 0.941                    | -                         |
+| 0.992 | 6200 | 0.0003        | 0.4286 | 0.941                    | -                         |
+| 1.0   | 6250 | -             | -      | -                        | 0.947                     |
 ### Framework Versions
 }
 ```
+#### CachedMultipleNegativesRankingLoss
 ```bibtex
+@misc{gao2021scaling,
+    title={Scaling Deep Contrastive Learning Batch Size under Memory Limited Setup},
+    author={Luyu Gao and Yunyi Zhang and Jiawei Han and Jamie Callan},
+    year={2021},
+    eprint={2101.06983},
     archivePrefix={arXiv},
+    primaryClass={cs.LG}
 }
 ```

final/config.json CHANGED Viewed

@@ -20,7 +20,7 @@
   "output_past": true,
   "pad_token_id": 1,
   "position_embedding_type": "absolute",
-  "torch_dtype": "float32",
   "transformers_version": "4.41.2",
   "type_vocab_size": 1,
   "use_cache": true,

   "output_past": true,
   "pad_token_id": 1,
   "position_embedding_type": "absolute",
+  "torch_dtype": "bfloat16",
   "transformers_version": "4.41.2",
   "type_vocab_size": 1,
   "use_cache": true,

final/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8c35c8055250a476dc32b87c601f3abe4bc9aa87098c4d6976e79dc6094a3af3
-size 2239607176

 version https://git-lfs.github.com/spec/v1
+oid sha256:6e81195ec7ed25e3bc167b118e7676c00ae9f6d52630d8d25e4cfa5974ddf530
+size 1119826072

runs/Jun03_21-23-40_ruche-gpu18.cluster/events.out.tfevents.1717442695.ruche-gpu18.cluster.1785.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6202c48153d42fde3cf0082ca6c6a70f02b954c582d4054b5ec57f7ab8f5969a
+size 15963

runs/Jun03_21-55-04_ruche-gpu18.cluster/events.out.tfevents.1717444545.ruche-gpu18.cluster.20850.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d6d264d673c99450b336f2afe2e5b1eeabbe74cba8049adbc8aa526cd738e7de
+size 56493