Femboyuwu2000 commited on
Commit
1fd787f
1 Parent(s): 9952423

Training in progress, step 3960, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de0aeaf21dfa25596d3e83291fdac7d22780924777e6b26386fda19e60f85953
3
  size 13982248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7889d14a2444a830b872f5e86d561143f2cbab0ea5ba81dbb788ecd497b6dc12
3
  size 13982248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adef023b0b0184db66b469a4b3d5b9622ecbb07244f5c52c41d3671dd322e40a
3
  size 7062522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ef7e9ed4a471e46548d0b25542268f6f7e42b296dc835d21a4629297b643f8a
3
  size 7062522
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31209e19cd1e43f15fe297600ccd65154a6150b1671600037426d3845125ba25
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94bb4d16678255f69acd3c6ad1f7e28dae58deeccef6c73a9ed3860161dd1747
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c28f06383bf072aeed87a869c461b8eda46a4fd1dbe3f64038b5a01c478bcad
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4aa728ff8dd68ad393219e062ef8c6a1fc7dc031ee553ad652965f2249179521
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3152,
5
  "eval_steps": 500,
6
- "global_step": 3940,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1386,6 +1386,13 @@
1386
  "learning_rate": 2.7858899370977123e-05,
1387
  "loss": 3.589,
1388
  "step": 3940
 
 
 
 
 
 
 
1389
  }
1390
  ],
1391
  "logging_steps": 20,
@@ -1393,7 +1400,7 @@
1393
  "num_input_tokens_seen": 0,
1394
  "num_train_epochs": 2,
1395
  "save_steps": 20,
1396
- "total_flos": 9303592776990720.0,
1397
  "train_batch_size": 8,
1398
  "trial_name": null,
1399
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.3168,
5
  "eval_steps": 500,
6
+ "global_step": 3960,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1386
  "learning_rate": 2.7858899370977123e-05,
1387
  "loss": 3.589,
1388
  "step": 3940
1389
+ },
1390
+ {
1391
+ "epoch": 0.32,
1392
+ "grad_norm": 48.60535430908203,
1393
+ "learning_rate": 2.783381823144452e-05,
1394
+ "loss": 3.6398,
1395
+ "step": 3960
1396
  }
1397
  ],
1398
  "logging_steps": 20,
 
1400
  "num_input_tokens_seen": 0,
1401
  "num_train_epochs": 2,
1402
  "save_steps": 20,
1403
+ "total_flos": 9350734873362432.0,
1404
  "train_batch_size": 8,
1405
  "trial_name": null,
1406
  "trial_params": null