diaenra commited on
Commit
0094cd7
·
verified ·
1 Parent(s): 9851bb8

Training in progress, step 494, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fd669b36c142db3b5ac9018afcce2cd8dcdd67a8f0aae5ba8ddc72a8bca0121
3
  size 4102239984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fff17ef401d6e6ad527e825d1eafb7c8fd1e90b6f724e50fa522778c68104c9c
3
  size 4102239984
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:405b57b437f7310eb637920c1f9993e3c54797e99626d6cb1ee7955789ea5cae
3
  size 8204830696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:343c6b4955f08eef048f3d7f9a6f80d0d448a7ab6cf7b58f095c7bd4723bdeb2
3
  size 8204830696
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8a5d538bb0eaa69aecbaf9562eddfe3aac9c94b0e90b5825e6721ab12978dc1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aad7d87d2cae58a23d1b2313ff708823206e2a45a1d1364fc765c5d30d52892d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50bf10ab16acdf754678aad1e5a7a0f326946162adfbb22565a648cfbb9b4bdb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca23e5c877c9d2e3eb941aadb9b0bac9e08da5e990c782ceb720152ffa5e60a9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.966144517433047,
5
  "eval_steps": 500,
6
- "global_step": 478,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3353,6 +3353,118 @@
3353
  "learning_rate": 4.0634802034176244e-07,
3354
  "loss": 1.3784,
3355
  "step": 478
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3356
  }
3357
  ],
3358
  "logging_steps": 1,
@@ -3367,12 +3479,12 @@
3367
  "should_evaluate": false,
3368
  "should_log": false,
3369
  "should_save": true,
3370
- "should_training_stop": false
3371
  },
3372
  "attributes": {}
3373
  }
3374
  },
3375
- "total_flos": 2.1989875311968256e+17,
3376
  "train_batch_size": 4,
3377
  "trial_name": null,
3378
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9984840828701365,
5
  "eval_steps": 500,
6
+ "global_step": 494,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3353
  "learning_rate": 4.0634802034176244e-07,
3354
  "loss": 1.3784,
3355
  "step": 478
3356
+ },
3357
+ {
3358
+ "epoch": 0.9681657402728651,
3359
+ "grad_norm": 5.375519275665283,
3360
+ "learning_rate": 3.572004926893413e-07,
3361
+ "loss": 1.5642,
3362
+ "step": 479
3363
+ },
3364
+ {
3365
+ "epoch": 0.9701869631126832,
3366
+ "grad_norm": 5.717057704925537,
3367
+ "learning_rate": 3.112091414176621e-07,
3368
+ "loss": 1.7185,
3369
+ "step": 480
3370
+ },
3371
+ {
3372
+ "epoch": 0.9722081859525012,
3373
+ "grad_norm": 4.669751167297363,
3374
+ "learning_rate": 2.6837689055232426e-07,
3375
+ "loss": 1.2948,
3376
+ "step": 481
3377
+ },
3378
+ {
3379
+ "epoch": 0.9742294087923193,
3380
+ "grad_norm": 4.622890949249268,
3381
+ "learning_rate": 2.287064632705005e-07,
3382
+ "loss": 1.1733,
3383
+ "step": 482
3384
+ },
3385
+ {
3386
+ "epoch": 0.9762506316321374,
3387
+ "grad_norm": 4.939849376678467,
3388
+ "learning_rate": 1.9220038172780842e-07,
3389
+ "loss": 1.3322,
3390
+ "step": 483
3391
+ },
3392
+ {
3393
+ "epoch": 0.9782718544719555,
3394
+ "grad_norm": 5.243642330169678,
3395
+ "learning_rate": 1.588609668979446e-07,
3396
+ "loss": 1.2098,
3397
+ "step": 484
3398
+ },
3399
+ {
3400
+ "epoch": 0.9802930773117736,
3401
+ "grad_norm": 5.232309818267822,
3402
+ "learning_rate": 1.286903384251581e-07,
3403
+ "loss": 1.2723,
3404
+ "step": 485
3405
+ },
3406
+ {
3407
+ "epoch": 0.9823143001515917,
3408
+ "grad_norm": 6.266340732574463,
3409
+ "learning_rate": 1.0169041448943039e-07,
3410
+ "loss": 1.2856,
3411
+ "step": 486
3412
+ },
3413
+ {
3414
+ "epoch": 0.9843355229914098,
3415
+ "grad_norm": 6.7853851318359375,
3416
+ "learning_rate": 7.78629116845786e-08,
3417
+ "loss": 1.2888,
3418
+ "step": 487
3419
+ },
3420
+ {
3421
+ "epoch": 0.9863567458312279,
3422
+ "grad_norm": 5.390272617340088,
3423
+ "learning_rate": 5.7209344909076036e-08,
3424
+ "loss": 1.271,
3425
+ "step": 488
3426
+ },
3427
+ {
3428
+ "epoch": 0.988377968671046,
3429
+ "grad_norm": 5.515243053436279,
3430
+ "learning_rate": 3.973102726976819e-08,
3431
+ "loss": 1.1683,
3432
+ "step": 489
3433
+ },
3434
+ {
3435
+ "epoch": 0.9903991915108641,
3436
+ "grad_norm": 5.492679595947266,
3437
+ "learning_rate": 2.542906999836725e-08,
3438
+ "loss": 1.2462,
3439
+ "step": 490
3440
+ },
3441
+ {
3442
+ "epoch": 0.9924204143506822,
3443
+ "grad_norm": 5.038405418395996,
3444
+ "learning_rate": 1.4304382380819769e-08,
3445
+ "loss": 1.1106,
3446
+ "step": 491
3447
+ },
3448
+ {
3449
+ "epoch": 0.9944416371905003,
3450
+ "grad_norm": 5.300425052642822,
3451
+ "learning_rate": 6.357671699486201e-09,
3452
+ "loss": 1.1875,
3453
+ "step": 492
3454
+ },
3455
+ {
3456
+ "epoch": 0.9964628600303184,
3457
+ "grad_norm": 5.133315086364746,
3458
+ "learning_rate": 1.5894431881657845e-09,
3459
+ "loss": 1.1627,
3460
+ "step": 493
3461
+ },
3462
+ {
3463
+ "epoch": 0.9984840828701365,
3464
+ "grad_norm": 5.80331563949585,
3465
+ "learning_rate": 0.0,
3466
+ "loss": 1.1667,
3467
+ "step": 494
3468
  }
3469
  ],
3470
  "logging_steps": 1,
 
3479
  "should_evaluate": false,
3480
  "should_log": false,
3481
  "should_save": true,
3482
+ "should_training_stop": true
3483
  },
3484
  "attributes": {}
3485
  }
3486
  },
3487
+ "total_flos": 2.2725168948314112e+17,
3488
  "train_batch_size": 4,
3489
  "trial_name": null,
3490
  "trial_params": null