Model save
Browse files- README.md +7 -7
- config.json +1 -1
- generation_config.json +2 -1
README.md
CHANGED
@@ -47,16 +47,16 @@ The following hyperparameters were used during training:
|
|
47 |
|
48 |
### Training results
|
49 |
|
50 |
-
| Training Loss | Epoch
|
51 |
-
|
52 |
-
| 0.0 | 0
|
53 |
-
| 0.0 |
|
54 |
-
| 0.0 |
|
55 |
|
56 |
|
57 |
### Framework versions
|
58 |
|
59 |
-
- Transformers 4.41.
|
60 |
- Pytorch 2.3.0+cu121
|
61 |
-
- Datasets 2.
|
62 |
- Tokenizers 0.19.1
|
|
|
47 |
|
48 |
### Training results
|
49 |
|
50 |
+
| Training Loss | Epoch | Step | Validation Loss |
|
51 |
+
|:-------------:|:-----:|:----:|:---------------:|
|
52 |
+
| 0.0 | 1.0 | 130 | nan |
|
53 |
+
| 0.0 | 2.0 | 260 | nan |
|
54 |
+
| 0.0 | 3.0 | 390 | nan |
|
55 |
|
56 |
|
57 |
### Framework versions
|
58 |
|
59 |
+
- Transformers 4.41.2
|
60 |
- Pytorch 2.3.0+cu121
|
61 |
+
- Datasets 2.20.0
|
62 |
- Tokenizers 0.19.1
|
config.json
CHANGED
@@ -27,7 +27,7 @@
|
|
27 |
"relative_attention_num_buckets": 32,
|
28 |
"tie_word_embeddings": false,
|
29 |
"torch_dtype": "float32",
|
30 |
-
"transformers_version": "4.41.
|
31 |
"use_cache": true,
|
32 |
"vocab_size": 32128
|
33 |
}
|
|
|
27 |
"relative_attention_num_buckets": 32,
|
28 |
"tie_word_embeddings": false,
|
29 |
"torch_dtype": "float32",
|
30 |
+
"transformers_version": "4.41.2",
|
31 |
"use_cache": true,
|
32 |
"vocab_size": 32128
|
33 |
}
|
generation_config.json
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
{
|
|
|
2 |
"decoder_start_token_id": 0,
|
3 |
"eos_token_id": 1,
|
4 |
"pad_token_id": 0,
|
5 |
-
"transformers_version": "4.41.
|
6 |
}
|
|
|
1 |
{
|
2 |
+
"_from_model_config": true,
|
3 |
"decoder_start_token_id": 0,
|
4 |
"eos_token_id": 1,
|
5 |
"pad_token_id": 0,
|
6 |
+
"transformers_version": "4.41.2"
|
7 |
}
|