Model save
Browse files- README.md +1 -1
- all_results.json +7 -7
- config.json +1 -1
- train_results.json +7 -7
- trainer_state.json +0 -0
README.md
CHANGED
@@ -35,7 +35,7 @@ This model was trained with SFT.
|
|
35 |
|
36 |
- TRL: 0.12.1
|
37 |
- Transformers: 4.46.3
|
38 |
-
- Pytorch: 2.
|
39 |
- Datasets: 3.1.0
|
40 |
- Tokenizers: 0.20.3
|
41 |
|
|
|
35 |
|
36 |
- TRL: 0.12.1
|
37 |
- Transformers: 4.46.3
|
38 |
+
- Pytorch: 2.5.1
|
39 |
- Datasets: 3.1.0
|
40 |
- Tokenizers: 0.20.3
|
41 |
|
all_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"total_flos":
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
-
"train_samples":
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second": 0.
|
9 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 5.0,
|
3 |
+
"total_flos": 4.236515906227798e+19,
|
4 |
+
"train_loss": 0.23586479178022945,
|
5 |
+
"train_runtime": 155185.7869,
|
6 |
+
"train_samples": 353068,
|
7 |
+
"train_samples_per_second": 11.376,
|
8 |
+
"train_steps_per_second": 0.356
|
9 |
}
|
config.json
CHANGED
@@ -31,6 +31,6 @@
|
|
31 |
"tie_word_embeddings": true,
|
32 |
"torch_dtype": "bfloat16",
|
33 |
"transformers_version": "4.46.3",
|
34 |
-
"use_cache":
|
35 |
"vocab_size": 128256
|
36 |
}
|
|
|
31 |
"tie_word_embeddings": true,
|
32 |
"torch_dtype": "bfloat16",
|
33 |
"transformers_version": "4.46.3",
|
34 |
+
"use_cache": true,
|
35 |
"vocab_size": 128256
|
36 |
}
|
train_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"total_flos":
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
-
"train_samples":
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second": 0.
|
9 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 5.0,
|
3 |
+
"total_flos": 4.236515906227798e+19,
|
4 |
+
"train_loss": 0.23586479178022945,
|
5 |
+
"train_runtime": 155185.7869,
|
6 |
+
"train_samples": 353068,
|
7 |
+
"train_samples_per_second": 11.376,
|
8 |
+
"train_steps_per_second": 0.356
|
9 |
}
|
trainer_state.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|