leixa commited on
Commit
083457c
·
verified ·
1 Parent(s): cbbcfe7

Training in progress, step 52, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a41d5dc6f07b17a91d7523b564a479fa05deb815c989616b96e02696d8a97d3a
3
  size 150486964
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1501b3eb8d216af70c020ad20220edf6fefb2af2c5abde1695b7191ffd997993
3
  size 150486964
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aef115ea703f8ce85f7370acd9e9ca9837347e723c8f2722bbfa4626aedfe995
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c87d052c61336e5f4eed49f8684dc218013ed8627df5aac854dc09f34705ac8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b06606e40b3f01ec7202acbfd1d0145e551b0842ff237681a6bdde0b316ef9b5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:792ad29798cba378feffa861e2939c0c9205d97b89b46c0161cdef16a3d333a2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3979591836734694,
5
  "eval_steps": 13,
6
- "global_step": 39,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -130,6 +130,42 @@
130
  "eval_samples_per_second": 36.421,
131
  "eval_steps_per_second": 4.635,
132
  "step": 39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  }
134
  ],
135
  "logging_steps": 3,
@@ -149,7 +185,7 @@
149
  "attributes": {}
150
  }
151
  },
152
- "total_flos": 1.061364449673216e+16,
153
  "train_batch_size": 8,
154
  "trial_name": null,
155
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5306122448979592,
5
  "eval_steps": 13,
6
+ "global_step": 52,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
130
  "eval_samples_per_second": 36.421,
131
  "eval_steps_per_second": 4.635,
132
  "step": 39
133
+ },
134
+ {
135
+ "epoch": 0.42857142857142855,
136
+ "grad_norm": NaN,
137
+ "learning_rate": 8.765357330018056e-05,
138
+ "loss": 0.0,
139
+ "step": 42
140
+ },
141
+ {
142
+ "epoch": 0.45918367346938777,
143
+ "grad_norm": NaN,
144
+ "learning_rate": 8.535533905932738e-05,
145
+ "loss": 0.0,
146
+ "step": 45
147
+ },
148
+ {
149
+ "epoch": 0.4897959183673469,
150
+ "grad_norm": NaN,
151
+ "learning_rate": 8.289693629698564e-05,
152
+ "loss": 0.0,
153
+ "step": 48
154
+ },
155
+ {
156
+ "epoch": 0.5204081632653061,
157
+ "grad_norm": NaN,
158
+ "learning_rate": 8.0289502192041e-05,
159
+ "loss": 0.0,
160
+ "step": 51
161
+ },
162
+ {
163
+ "epoch": 0.5306122448979592,
164
+ "eval_loss": NaN,
165
+ "eval_runtime": 4.5333,
166
+ "eval_samples_per_second": 36.397,
167
+ "eval_steps_per_second": 4.632,
168
+ "step": 52
169
  }
170
  ],
171
  "logging_steps": 3,
 
185
  "attributes": {}
186
  }
187
  },
188
+ "total_flos": 1.415152599564288e+16,
189
  "train_batch_size": 8,
190
  "trial_name": null,
191
  "trial_params": null