ardaspear commited on
Commit
1669fd5
·
verified ·
1 Parent(s): 48598a1

Training in progress, step 170, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d49930c00528f90da24b6b608e7e80bf7ca0b3683118ac70ed158a02ed82355
3
  size 72396376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c925b87b8d7870ce14fd59fe02160bc0a9b8db123ed15f75cea43d25afd664bb
3
  size 72396376
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc94d5b33f89c7421164351f9effdc622887a5194f3ac924e62ab147d4f6ce73
3
  size 37134420
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50f8022e28ae5b2ba03c1fd7bc121b9ea02cb3e858280eee3e2ab2462cc80c3d
3
  size 37134420
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd821849c8a72b596e3334c93cfab0767818edfb3fd7124c4efcd8a0829df152
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8cfbc8e54b819f718872be40a285c49725dec3a1d4f07ec32ca0a5444a91520
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f676b917baa0a895f62d4ea12d985f19fe259f840a0ba6d41d00cece68314f5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f21ce5519aba36efeb75a8dad39ab6bd85bd42d0ae24cbc1f5cfa5d96741b8bc
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.038669320443559856,
5
  "eval_steps": 34,
6
- "global_step": 136,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -362,6 +362,91 @@
362
  "eval_samples_per_second": 35.248,
363
  "eval_steps_per_second": 4.409,
364
  "step": 136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  }
366
  ],
367
  "logging_steps": 3,
@@ -381,7 +466,7 @@
381
  "attributes": {}
382
  }
383
  },
384
- "total_flos": 4.355200239481651e+16,
385
  "train_batch_size": 8,
386
  "trial_name": null,
387
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.048336650554449814,
5
  "eval_steps": 34,
6
+ "global_step": 170,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
362
  "eval_samples_per_second": 35.248,
363
  "eval_steps_per_second": 4.409,
364
  "step": 136
365
+ },
366
+ {
367
+ "epoch": 0.03923798692067103,
368
+ "grad_norm": 0.4538515508174896,
369
+ "learning_rate": 3.784717029321922e-05,
370
+ "loss": 0.7585,
371
+ "step": 138
372
+ },
373
+ {
374
+ "epoch": 0.04009098663633779,
375
+ "grad_norm": 0.5923985838890076,
376
+ "learning_rate": 3.732519254757344e-05,
377
+ "loss": 0.7211,
378
+ "step": 141
379
+ },
380
+ {
381
+ "epoch": 0.04094398635200455,
382
+ "grad_norm": 0.36592337489128113,
383
+ "learning_rate": 3.679601723656205e-05,
384
+ "loss": 0.6604,
385
+ "step": 144
386
+ },
387
+ {
388
+ "epoch": 0.04179698606767131,
389
+ "grad_norm": 0.44677990674972534,
390
+ "learning_rate": 3.625995338366492e-05,
391
+ "loss": 0.7846,
392
+ "step": 147
393
+ },
394
+ {
395
+ "epoch": 0.04264998578333807,
396
+ "grad_norm": 0.4921860098838806,
397
+ "learning_rate": 3.5717314035076355e-05,
398
+ "loss": 0.6709,
399
+ "step": 150
400
+ },
401
+ {
402
+ "epoch": 0.04350298549900483,
403
+ "grad_norm": 0.5457685589790344,
404
+ "learning_rate": 3.516841607689501e-05,
405
+ "loss": 0.6974,
406
+ "step": 153
407
+ },
408
+ {
409
+ "epoch": 0.044355985214671594,
410
+ "grad_norm": 0.46299344301223755,
411
+ "learning_rate": 3.461358005007128e-05,
412
+ "loss": 0.6643,
413
+ "step": 156
414
+ },
415
+ {
416
+ "epoch": 0.045208984930338356,
417
+ "grad_norm": 0.44877251982688904,
418
+ "learning_rate": 3.405312996322042e-05,
419
+ "loss": 0.6035,
420
+ "step": 159
421
+ },
422
+ {
423
+ "epoch": 0.04606198464600512,
424
+ "grad_norm": 0.3635808527469635,
425
+ "learning_rate": 3.348739310341068e-05,
426
+ "loss": 0.6781,
427
+ "step": 162
428
+ },
429
+ {
430
+ "epoch": 0.04691498436167188,
431
+ "grad_norm": 0.4172018766403198,
432
+ "learning_rate": 3.2916699845036816e-05,
433
+ "loss": 0.6195,
434
+ "step": 165
435
+ },
436
+ {
437
+ "epoch": 0.04776798407733864,
438
+ "grad_norm": 0.36372795701026917,
439
+ "learning_rate": 3.234138345689077e-05,
440
+ "loss": 0.6599,
441
+ "step": 168
442
+ },
443
+ {
444
+ "epoch": 0.048336650554449814,
445
+ "eval_loss": 0.606797456741333,
446
+ "eval_runtime": 168.0542,
447
+ "eval_samples_per_second": 35.251,
448
+ "eval_steps_per_second": 4.409,
449
+ "step": 170
450
  }
451
  ],
452
  "logging_steps": 3,
 
466
  "attributes": {}
467
  }
468
  },
469
+ "total_flos": 5.444000299352064e+16,
470
  "train_batch_size": 8,
471
  "trial_name": null,
472
  "trial_params": null