leixa commited on
Commit
fd353a2
·
verified ·
1 Parent(s): 5d7db14

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d44bc5be1e3118d2e77ce82df05169f0d9aabeb7f36133abc9476b7e106edf50
3
  size 1001465824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d0a8a3ec4f34418a88a656cab64ce468819986166088766eac291b51c015350
3
  size 1001465824
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d59946c8391924c55bdbeee729207f19b6103e226220bb310247dcc3cc9d9cb
3
  size 509176980
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5204c920a9c28a65017b7dc19a74b014324204b35ad86defdf6ec3b9bfab5a3b
3
  size 509176980
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e89a5182bb97955f9b45a8cc3c3bf35765c6a725efad38ec6b58f6ba203942c8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:026b710e7a787e8533a7b53109192c1ec5bf78162ec26798b66409b9a5f88889
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dac856699362054adec05f5effdce5ef83a8d5422ff5e800f6e464aadfae0e0f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cb01d4f0da8d959e9ca5a4773566e24c639c6b8f18e1285757275e715c7b05f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.346320346320346,
5
  "eval_steps": 15,
6
- "global_step": 135,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -402,6 +402,49 @@
402
  "eval_samples_per_second": 4.437,
403
  "eval_steps_per_second": 0.595,
404
  "step": 135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  }
406
  ],
407
  "logging_steps": 3,
@@ -421,7 +464,7 @@
421
  "attributes": {}
422
  }
423
  },
424
- "total_flos": 6.94240387572695e+17,
425
  "train_batch_size": 8,
426
  "trial_name": null,
427
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.606060606060606,
5
  "eval_steps": 15,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
402
  "eval_samples_per_second": 4.437,
403
  "eval_steps_per_second": 0.595,
404
  "step": 135
405
+ },
406
+ {
407
+ "epoch": 2.398268398268398,
408
+ "grad_norm": 1.1166489124298096,
409
+ "learning_rate": 1.0951380325872979e-05,
410
+ "loss": 1.5943,
411
+ "step": 138
412
+ },
413
+ {
414
+ "epoch": 2.45021645021645,
415
+ "grad_norm": 1.1581447124481201,
416
+ "learning_rate": 9.212000874196953e-06,
417
+ "loss": 1.7557,
418
+ "step": 141
419
+ },
420
+ {
421
+ "epoch": 2.502164502164502,
422
+ "grad_norm": 1.1575648784637451,
423
+ "learning_rate": 7.60894734597476e-06,
424
+ "loss": 1.6507,
425
+ "step": 144
426
+ },
427
+ {
428
+ "epoch": 2.554112554112554,
429
+ "grad_norm": 1.119139313697815,
430
+ "learning_rate": 6.147577634637414e-06,
431
+ "loss": 1.7355,
432
+ "step": 147
433
+ },
434
+ {
435
+ "epoch": 2.606060606060606,
436
+ "grad_norm": 1.2103753089904785,
437
+ "learning_rate": 4.832776083120982e-06,
438
+ "loss": 1.6898,
439
+ "step": 150
440
+ },
441
+ {
442
+ "epoch": 2.606060606060606,
443
+ "eval_loss": 0.6833479404449463,
444
+ "eval_runtime": 21.8503,
445
+ "eval_samples_per_second": 4.439,
446
+ "eval_steps_per_second": 0.595,
447
+ "step": 150
448
  }
449
  ],
450
  "logging_steps": 3,
 
464
  "attributes": {}
465
  }
466
  },
467
+ "total_flos": 7.70877313473577e+17,
468
  "train_batch_size": 8,
469
  "trial_name": null,
470
  "trial_params": null