eeeebbb2 commited on
Commit
413ec73
1 Parent(s): 446f13c

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff36cb5b13ab72618011d489e51482a332f05a5b45061c4b099deeb720ab1c62
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34d07eb5380e96d5e791a3ecd753179c7acb95944feec557d098cf6b6d543f63
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8763481db7be9f326264a81beedbff3e9bcdb07301124b357b633913f222735
3
  size 640009682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99143540cf3ccfc9e2208eb18f6ff593f26140a559e60951363b707a1a477405
3
  size 640009682
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2582681deddb4536c67af0b6e0a9d2fa4c7e6d140569ce9907495f5c249a47ea
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eef94ecc4f6620d80a5df52c9e2c8bb2dde70d4bc37bcb690ab095c59d937d5d
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88e9fdfe5e814903c75696aac5773ec597bca67dd3b79be38ccdfe8aecf08070
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d0b679507c870d9fb220882cbb4c9c656af0671c31434c7d90079993b797f53
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e54566635f21f926f14ad429291289b3d44e248f4a6883045d7883e90d57ddd
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7d6ebdbab45feeede7eb7e01c400981df9bbd491ecd5477f14330cdbe7cf123
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab6c89d6719982a35b4026234cc3804240728bedad333420d2706a553627ae65
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a635cb715f9a83ae2e7e30ede64cf9b1b72d05ad6e78c7ca219107623d333b9f
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d271cdb95f63cd655315f063ca2e25c78dc5ae4275523c5d4f80f367586b3351
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5607f6de446164d9d9adb8b91c44cec55b14aa391e24ba5637c08b834eedda2a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.1073193550109863,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-25",
4
- "epoch": 0.012519953676171399,
5
  "eval_steps": 25,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -198,6 +198,189 @@
198
  "eval_samples_per_second": 12.522,
199
  "eval_steps_per_second": 3.256,
200
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  }
202
  ],
203
  "logging_steps": 1,
@@ -221,12 +404,12 @@
221
  "should_evaluate": false,
222
  "should_log": false,
223
  "should_save": true,
224
- "should_training_stop": false
225
  },
226
  "attributes": {}
227
  }
228
  },
229
- "total_flos": 2.9582754301188506e+17,
230
  "train_batch_size": 2,
231
  "trial_name": null,
232
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.0611395835876465,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
+ "epoch": 0.025039907352342797,
5
  "eval_steps": 25,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
198
  "eval_samples_per_second": 12.522,
199
  "eval_steps_per_second": 3.256,
200
  "step": 25
201
+ },
202
+ {
203
+ "epoch": 0.013020751823218255,
204
+ "grad_norm": 6.128429889678955,
205
+ "learning_rate": 5.500000000000001e-05,
206
+ "loss": 20.2164,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 0.01352154997026511,
211
+ "grad_norm": 7.410825729370117,
212
+ "learning_rate": 5.205685918464356e-05,
213
+ "loss": 20.2266,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 0.014022348117311966,
218
+ "grad_norm": 4.50378942489624,
219
+ "learning_rate": 4.912632135009769e-05,
220
+ "loss": 17.0934,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 0.014523146264358822,
225
+ "grad_norm": 4.2456464767456055,
226
+ "learning_rate": 4.6220935509274235e-05,
227
+ "loss": 17.7946,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 0.015023944411405678,
232
+ "grad_norm": 3.301119327545166,
233
+ "learning_rate": 4.3353142970386564e-05,
234
+ "loss": 19.8457,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 0.015524742558452534,
239
+ "grad_norm": 3.3931143283843994,
240
+ "learning_rate": 4.053522406135775e-05,
241
+ "loss": 17.4589,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 0.01602554070549939,
246
+ "grad_norm": 3.908799409866333,
247
+ "learning_rate": 3.777924554357096e-05,
248
+ "loss": 17.9799,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 0.016526338852546247,
253
+ "grad_norm": 4.308777809143066,
254
+ "learning_rate": 3.509700894014496e-05,
255
+ "loss": 18.2389,
256
+ "step": 33
257
+ },
258
+ {
259
+ "epoch": 0.0170271369995931,
260
+ "grad_norm": 5.028868198394775,
261
+ "learning_rate": 3.250000000000001e-05,
262
+ "loss": 18.8644,
263
+ "step": 34
264
+ },
265
+ {
266
+ "epoch": 0.01752793514663996,
267
+ "grad_norm": 5.518270492553711,
268
+ "learning_rate": 2.9999339514117912e-05,
269
+ "loss": 17.9908,
270
+ "step": 35
271
+ },
272
+ {
273
+ "epoch": 0.018028733293686813,
274
+ "grad_norm": 4.996335506439209,
275
+ "learning_rate": 2.760573569460757e-05,
276
+ "loss": 15.7478,
277
+ "step": 36
278
+ },
279
+ {
280
+ "epoch": 0.01852953144073367,
281
+ "grad_norm": 6.510356426239014,
282
+ "learning_rate": 2.53294383204969e-05,
283
+ "loss": 16.2257,
284
+ "step": 37
285
+ },
286
+ {
287
+ "epoch": 0.019030329587780524,
288
+ "grad_norm": 5.005591869354248,
289
+ "learning_rate": 2.3180194846605367e-05,
290
+ "loss": 21.489,
291
+ "step": 38
292
+ },
293
+ {
294
+ "epoch": 0.019531127734827382,
295
+ "grad_norm": 9.856449127197266,
296
+ "learning_rate": 2.1167208663446025e-05,
297
+ "loss": 21.2883,
298
+ "step": 39
299
+ },
300
+ {
301
+ "epoch": 0.020031925881874236,
302
+ "grad_norm": 3.623931646347046,
303
+ "learning_rate": 1.9299099686894423e-05,
304
+ "loss": 17.6629,
305
+ "step": 40
306
+ },
307
+ {
308
+ "epoch": 0.020532724028921093,
309
+ "grad_norm": 3.4106252193450928,
310
+ "learning_rate": 1.758386744638546e-05,
311
+ "loss": 15.4309,
312
+ "step": 41
313
+ },
314
+ {
315
+ "epoch": 0.021033522175967947,
316
+ "grad_norm": 4.296844005584717,
317
+ "learning_rate": 1.602885682970026e-05,
318
+ "loss": 17.6033,
319
+ "step": 42
320
+ },
321
+ {
322
+ "epoch": 0.021534320323014805,
323
+ "grad_norm": 4.559662342071533,
324
+ "learning_rate": 1.464072663102903e-05,
325
+ "loss": 16.8699,
326
+ "step": 43
327
+ },
328
+ {
329
+ "epoch": 0.02203511847006166,
330
+ "grad_norm": 4.427708625793457,
331
+ "learning_rate": 1.3425421036992098e-05,
332
+ "loss": 17.4621,
333
+ "step": 44
334
+ },
335
+ {
336
+ "epoch": 0.022535916617108517,
337
+ "grad_norm": 3.9432215690612793,
338
+ "learning_rate": 1.2388144172720251e-05,
339
+ "loss": 17.8905,
340
+ "step": 45
341
+ },
342
+ {
343
+ "epoch": 0.023036714764155374,
344
+ "grad_norm": 3.188026189804077,
345
+ "learning_rate": 1.1533337816991932e-05,
346
+ "loss": 17.3133,
347
+ "step": 46
348
+ },
349
+ {
350
+ "epoch": 0.02353751291120223,
351
+ "grad_norm": 3.24480938911438,
352
+ "learning_rate": 1.0864662381854632e-05,
353
+ "loss": 16.5617,
354
+ "step": 47
355
+ },
356
+ {
357
+ "epoch": 0.024038311058249086,
358
+ "grad_norm": 3.795017719268799,
359
+ "learning_rate": 1.0384981238178534e-05,
360
+ "loss": 16.9529,
361
+ "step": 48
362
+ },
363
+ {
364
+ "epoch": 0.02453910920529594,
365
+ "grad_norm": 3.4241626262664795,
366
+ "learning_rate": 1.0096348454262845e-05,
367
+ "loss": 14.9861,
368
+ "step": 49
369
+ },
370
+ {
371
+ "epoch": 0.025039907352342797,
372
+ "grad_norm": 5.961770057678223,
373
+ "learning_rate": 1e-05,
374
+ "loss": 17.1319,
375
+ "step": 50
376
+ },
377
+ {
378
+ "epoch": 0.025039907352342797,
379
+ "eval_loss": 1.0611395835876465,
380
+ "eval_runtime": 3.8308,
381
+ "eval_samples_per_second": 13.052,
382
+ "eval_steps_per_second": 3.394,
383
+ "step": 50
384
  }
385
  ],
386
  "logging_steps": 1,
 
404
  "should_evaluate": false,
405
  "should_log": false,
406
  "should_save": true,
407
+ "should_training_stop": true
408
  },
409
  "attributes": {}
410
  }
411
  },
412
+ "total_flos": 5.9050464574754e+17,
413
  "train_batch_size": 2,
414
  "trial_name": null,
415
  "trial_params": null