fats-fme commited on
Commit
e3d4540
1 Parent(s): 246906e

Training in progress, step 63, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9ab19a8a8f8fa395c8a765767034493cd40b33b06b69f316a5bc79f2fb8a137
3
  size 30322120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f457b42d2924311d6385e3d082277d66bf56a13ea1aed2b2c66839023d5b13b
3
  size 30322120
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0b3b4462eee314e4809a24235151a982dab5d604d062fa3e99d0e2c7a7446c2
3
  size 60837186
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a77d569f3fabbf97a85eceb12a6f12b034329b77daf77c165c240e8e876d1628
3
  size 60837186
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fc69ab1dcca216771161e0b91f7d26d9c90fd7c25977be460383903db704a06
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a1658d632652adcbe63cac75e96f1db776eefb7599c6fef183ad994d5bd66c3
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:975fb42b56b65e8821d845db3ce30cfc648ea90ad5ce64077e43a4d086f034fc
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4406e8ac0eff4b9d2b3ffcfdafd38a3be4e1f654b02ae8e38b4876194d266d40
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d61d85a65f01744d9a673e2b200c99e2454febba890b00c501a234f6e9909e25
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:badf8dd7fd843c03f3dfa379f5a62cbdac918ff18b84343f94ed7c04cd128a3f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.758893280632411,
5
  "eval_steps": 16,
6
- "global_step": 48,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -375,6 +375,111 @@
375
  "eval_samples_per_second": 13.211,
376
  "eval_steps_per_second": 3.334,
377
  "step": 48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
  }
379
  ],
380
  "logging_steps": 1,
@@ -389,12 +494,12 @@
389
  "should_evaluate": false,
390
  "should_log": false,
391
  "should_save": true,
392
- "should_training_stop": false
393
  },
394
  "attributes": {}
395
  }
396
  },
397
- "total_flos": 7214050224111616.0,
398
  "train_batch_size": 2,
399
  "trial_name": null,
400
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9960474308300395,
5
  "eval_steps": 16,
6
+ "global_step": 63,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
375
  "eval_samples_per_second": 13.211,
376
  "eval_steps_per_second": 3.334,
377
  "step": 48
378
+ },
379
+ {
380
+ "epoch": 0.7747035573122529,
381
+ "grad_norm": 0.8376194834709167,
382
+ "learning_rate": 9.8e-05,
383
+ "loss": 0.1525,
384
+ "step": 49
385
+ },
386
+ {
387
+ "epoch": 0.7905138339920948,
388
+ "grad_norm": 0.18561908602714539,
389
+ "learning_rate": 0.0001,
390
+ "loss": 0.0172,
391
+ "step": 50
392
+ },
393
+ {
394
+ "epoch": 0.8063241106719368,
395
+ "grad_norm": 0.31320735812187195,
396
+ "learning_rate": 9.85470908713026e-05,
397
+ "loss": 0.0038,
398
+ "step": 51
399
+ },
400
+ {
401
+ "epoch": 0.8221343873517787,
402
+ "grad_norm": 0.4068452715873718,
403
+ "learning_rate": 9.42728012826605e-05,
404
+ "loss": 0.0178,
405
+ "step": 52
406
+ },
407
+ {
408
+ "epoch": 0.8379446640316206,
409
+ "grad_norm": 0.16684125363826752,
410
+ "learning_rate": 8.742553740855506e-05,
411
+ "loss": 0.0087,
412
+ "step": 53
413
+ },
414
+ {
415
+ "epoch": 0.8537549407114624,
416
+ "grad_norm": 0.05175252631306648,
417
+ "learning_rate": 7.840323733655778e-05,
418
+ "loss": 0.0011,
419
+ "step": 54
420
+ },
421
+ {
422
+ "epoch": 0.8695652173913043,
423
+ "grad_norm": 0.018029799684882164,
424
+ "learning_rate": 6.773024435212678e-05,
425
+ "loss": 0.0006,
426
+ "step": 55
427
+ },
428
+ {
429
+ "epoch": 0.8853754940711462,
430
+ "grad_norm": 0.03412799909710884,
431
+ "learning_rate": 5.602683401276615e-05,
432
+ "loss": 0.0009,
433
+ "step": 56
434
+ },
435
+ {
436
+ "epoch": 0.9011857707509882,
437
+ "grad_norm": 0.06215568631887436,
438
+ "learning_rate": 4.397316598723385e-05,
439
+ "loss": 0.0011,
440
+ "step": 57
441
+ },
442
+ {
443
+ "epoch": 0.9169960474308301,
444
+ "grad_norm": 0.05936681851744652,
445
+ "learning_rate": 3.226975564787322e-05,
446
+ "loss": 0.0011,
447
+ "step": 58
448
+ },
449
+ {
450
+ "epoch": 0.932806324110672,
451
+ "grad_norm": 0.045637015253305435,
452
+ "learning_rate": 2.1596762663442218e-05,
453
+ "loss": 0.0011,
454
+ "step": 59
455
+ },
456
+ {
457
+ "epoch": 0.9486166007905138,
458
+ "grad_norm": 0.03515447676181793,
459
+ "learning_rate": 1.257446259144494e-05,
460
+ "loss": 0.001,
461
+ "step": 60
462
+ },
463
+ {
464
+ "epoch": 0.9644268774703557,
465
+ "grad_norm": 0.04813205078244209,
466
+ "learning_rate": 5.727198717339511e-06,
467
+ "loss": 0.0011,
468
+ "step": 61
469
+ },
470
+ {
471
+ "epoch": 0.9802371541501976,
472
+ "grad_norm": 0.05862165987491608,
473
+ "learning_rate": 1.4529091286973995e-06,
474
+ "loss": 0.0012,
475
+ "step": 62
476
+ },
477
+ {
478
+ "epoch": 0.9960474308300395,
479
+ "grad_norm": 0.07167425751686096,
480
+ "learning_rate": 0.0,
481
+ "loss": 0.0096,
482
+ "step": 63
483
  }
484
  ],
485
  "logging_steps": 1,
 
494
  "should_evaluate": false,
495
  "should_log": false,
496
  "should_save": true,
497
+ "should_training_stop": true
498
  },
499
  "attributes": {}
500
  }
501
  },
502
+ "total_flos": 9264114014617600.0,
503
  "train_batch_size": 2,
504
  "trial_name": null,
505
  "trial_params": null