kooff11 commited on
Commit
ef65dc6
·
verified ·
1 Parent(s): ca6dc1e

Training in progress, step 21, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9bafac06c2c177b6873285cba9faa63a0aad0c7fde76726cc9c9ef561423470
3
  size 82460660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d66c403cdd318e9ea464acc3be84788a49b39f3efcac2737e1c82743e5fb752
3
  size 82460660
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:297e7ddbc295a07cecddf760a69f80e7807fb90e55e9ae40a8c01ff1aacc7519
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20a29dde21dfe173516419b977db3c1775d1cd4ba8f6bd6e1a59919a4f0685ef
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3fa8e33d36e4355c9c88316639a14b52060c5a44fe868e2fd61ab111f6ed180
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cd3306f67ab1d184a941a93922d03608c525054b65e45368eab572e7e3d4940
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f1660560e734c40e5470cca2d00d4f738c021882bc0c1304da944a20144bee2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0eef01f601d0942e2858e183d109e520edd8d27fcbc4627e3dd5104e8484712
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5185185185185185,
5
  "eval_steps": 7,
6
- "global_step": 14,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -129,6 +129,63 @@
129
  "eval_samples_per_second": 10.061,
130
  "eval_steps_per_second": 2.543,
131
  "step": 14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  }
133
  ],
134
  "logging_steps": 1,
@@ -148,7 +205,7 @@
148
  "attributes": {}
149
  }
150
  },
151
- "total_flos": 3.1316932709423514e+17,
152
  "train_batch_size": 2,
153
  "trial_name": null,
154
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7777777777777778,
5
  "eval_steps": 7,
6
+ "global_step": 21,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
129
  "eval_samples_per_second": 10.061,
130
  "eval_steps_per_second": 2.543,
131
  "step": 14
132
+ },
133
+ {
134
+ "epoch": 0.5555555555555556,
135
+ "grad_norm": NaN,
136
+ "learning_rate": 4.6860474023534335e-05,
137
+ "loss": 0.0,
138
+ "step": 15
139
+ },
140
+ {
141
+ "epoch": 0.5925925925925926,
142
+ "grad_norm": NaN,
143
+ "learning_rate": 4.063093427071376e-05,
144
+ "loss": 0.0,
145
+ "step": 16
146
+ },
147
+ {
148
+ "epoch": 0.6296296296296297,
149
+ "grad_norm": NaN,
150
+ "learning_rate": 3.4549150281252636e-05,
151
+ "loss": 0.0,
152
+ "step": 17
153
+ },
154
+ {
155
+ "epoch": 0.6666666666666666,
156
+ "grad_norm": NaN,
157
+ "learning_rate": 2.8711035421746367e-05,
158
+ "loss": 0.0,
159
+ "step": 18
160
+ },
161
+ {
162
+ "epoch": 0.7037037037037037,
163
+ "grad_norm": NaN,
164
+ "learning_rate": 2.3208660251050158e-05,
165
+ "loss": 0.0,
166
+ "step": 19
167
+ },
168
+ {
169
+ "epoch": 0.7407407407407407,
170
+ "grad_norm": NaN,
171
+ "learning_rate": 1.8128800512565513e-05,
172
+ "loss": 0.0,
173
+ "step": 20
174
+ },
175
+ {
176
+ "epoch": 0.7777777777777778,
177
+ "grad_norm": NaN,
178
+ "learning_rate": 1.3551568628929434e-05,
179
+ "loss": 0.0,
180
+ "step": 21
181
+ },
182
+ {
183
+ "epoch": 0.7777777777777778,
184
+ "eval_loss": NaN,
185
+ "eval_runtime": 18.0827,
186
+ "eval_samples_per_second": 10.065,
187
+ "eval_steps_per_second": 2.544,
188
+ "step": 21
189
  }
190
  ],
191
  "logging_steps": 1,
 
205
  "attributes": {}
206
  }
207
  },
208
+ "total_flos": 4.697539906413527e+17,
209
  "train_batch_size": 2,
210
  "trial_name": null,
211
  "trial_params": null