hhschu commited on
Commit
dd62e94
·
verified ·
1 Parent(s): 5fb6611

Model save

Browse files
Files changed (5) hide show
  1. README.md +1 -1
  2. all_results.json +7 -7
  3. config.json +1 -1
  4. train_results.json +7 -7
  5. trainer_state.json +0 -0
README.md CHANGED
@@ -35,7 +35,7 @@ This model was trained with SFT.
35
 
36
  - TRL: 0.12.1
37
  - Transformers: 4.46.3
38
- - Pytorch: 2.4.1
39
  - Datasets: 3.1.0
40
  - Tokenizers: 0.20.3
41
 
 
35
 
36
  - TRL: 0.12.1
37
  - Transformers: 4.46.3
38
+ - Pytorch: 2.5.1
39
  - Datasets: 3.1.0
40
  - Tokenizers: 0.20.3
41
 
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.0,
3
- "total_flos": 1352998748160000.0,
4
- "train_loss": 0.6282720804214478,
5
- "train_runtime": 247.2557,
6
- "train_samples": 100,
7
- "train_samples_per_second": 0.04,
8
- "train_steps_per_second": 0.04
9
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "total_flos": 4.236515906227798e+19,
4
+ "train_loss": 0.23586479178022945,
5
+ "train_runtime": 155185.7869,
6
+ "train_samples": 353068,
7
+ "train_samples_per_second": 11.376,
8
+ "train_steps_per_second": 0.356
9
  }
config.json CHANGED
@@ -31,6 +31,6 @@
31
  "tie_word_embeddings": true,
32
  "torch_dtype": "bfloat16",
33
  "transformers_version": "4.46.3",
34
- "use_cache": false,
35
  "vocab_size": 128256
36
  }
 
31
  "tie_word_embeddings": true,
32
  "torch_dtype": "bfloat16",
33
  "transformers_version": "4.46.3",
34
+ "use_cache": true,
35
  "vocab_size": 128256
36
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.0,
3
- "total_flos": 1352998748160000.0,
4
- "train_loss": 0.6282720804214478,
5
- "train_runtime": 247.2557,
6
- "train_samples": 100,
7
- "train_samples_per_second": 0.04,
8
- "train_steps_per_second": 0.04
9
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "total_flos": 4.236515906227798e+19,
4
+ "train_loss": 0.23586479178022945,
5
+ "train_runtime": 155185.7869,
6
+ "train_samples": 353068,
7
+ "train_samples_per_second": 11.376,
8
+ "train_steps_per_second": 0.356
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff