Training in progress, step 494, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4102239984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fff17ef401d6e6ad527e825d1eafb7c8fd1e90b6f724e50fa522778c68104c9c
|
3 |
size 4102239984
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 8204830696
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:343c6b4955f08eef048f3d7f9a6f80d0d448a7ab6cf7b58f095c7bd4723bdeb2
|
3 |
size 8204830696
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aad7d87d2cae58a23d1b2313ff708823206e2a45a1d1364fc765c5d30d52892d
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca23e5c877c9d2e3eb941aadb9b0bac9e08da5e990c782ceb720152ffa5e60a9
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3353,6 +3353,118 @@
|
|
3353 |
"learning_rate": 4.0634802034176244e-07,
|
3354 |
"loss": 1.3784,
|
3355 |
"step": 478
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3356 |
}
|
3357 |
],
|
3358 |
"logging_steps": 1,
|
@@ -3367,12 +3479,12 @@
|
|
3367 |
"should_evaluate": false,
|
3368 |
"should_log": false,
|
3369 |
"should_save": true,
|
3370 |
-
"should_training_stop":
|
3371 |
},
|
3372 |
"attributes": {}
|
3373 |
}
|
3374 |
},
|
3375 |
-
"total_flos": 2.
|
3376 |
"train_batch_size": 4,
|
3377 |
"trial_name": null,
|
3378 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9984840828701365,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 494,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3353 |
"learning_rate": 4.0634802034176244e-07,
|
3354 |
"loss": 1.3784,
|
3355 |
"step": 478
|
3356 |
+
},
|
3357 |
+
{
|
3358 |
+
"epoch": 0.9681657402728651,
|
3359 |
+
"grad_norm": 5.375519275665283,
|
3360 |
+
"learning_rate": 3.572004926893413e-07,
|
3361 |
+
"loss": 1.5642,
|
3362 |
+
"step": 479
|
3363 |
+
},
|
3364 |
+
{
|
3365 |
+
"epoch": 0.9701869631126832,
|
3366 |
+
"grad_norm": 5.717057704925537,
|
3367 |
+
"learning_rate": 3.112091414176621e-07,
|
3368 |
+
"loss": 1.7185,
|
3369 |
+
"step": 480
|
3370 |
+
},
|
3371 |
+
{
|
3372 |
+
"epoch": 0.9722081859525012,
|
3373 |
+
"grad_norm": 4.669751167297363,
|
3374 |
+
"learning_rate": 2.6837689055232426e-07,
|
3375 |
+
"loss": 1.2948,
|
3376 |
+
"step": 481
|
3377 |
+
},
|
3378 |
+
{
|
3379 |
+
"epoch": 0.9742294087923193,
|
3380 |
+
"grad_norm": 4.622890949249268,
|
3381 |
+
"learning_rate": 2.287064632705005e-07,
|
3382 |
+
"loss": 1.1733,
|
3383 |
+
"step": 482
|
3384 |
+
},
|
3385 |
+
{
|
3386 |
+
"epoch": 0.9762506316321374,
|
3387 |
+
"grad_norm": 4.939849376678467,
|
3388 |
+
"learning_rate": 1.9220038172780842e-07,
|
3389 |
+
"loss": 1.3322,
|
3390 |
+
"step": 483
|
3391 |
+
},
|
3392 |
+
{
|
3393 |
+
"epoch": 0.9782718544719555,
|
3394 |
+
"grad_norm": 5.243642330169678,
|
3395 |
+
"learning_rate": 1.588609668979446e-07,
|
3396 |
+
"loss": 1.2098,
|
3397 |
+
"step": 484
|
3398 |
+
},
|
3399 |
+
{
|
3400 |
+
"epoch": 0.9802930773117736,
|
3401 |
+
"grad_norm": 5.232309818267822,
|
3402 |
+
"learning_rate": 1.286903384251581e-07,
|
3403 |
+
"loss": 1.2723,
|
3404 |
+
"step": 485
|
3405 |
+
},
|
3406 |
+
{
|
3407 |
+
"epoch": 0.9823143001515917,
|
3408 |
+
"grad_norm": 6.266340732574463,
|
3409 |
+
"learning_rate": 1.0169041448943039e-07,
|
3410 |
+
"loss": 1.2856,
|
3411 |
+
"step": 486
|
3412 |
+
},
|
3413 |
+
{
|
3414 |
+
"epoch": 0.9843355229914098,
|
3415 |
+
"grad_norm": 6.7853851318359375,
|
3416 |
+
"learning_rate": 7.78629116845786e-08,
|
3417 |
+
"loss": 1.2888,
|
3418 |
+
"step": 487
|
3419 |
+
},
|
3420 |
+
{
|
3421 |
+
"epoch": 0.9863567458312279,
|
3422 |
+
"grad_norm": 5.390272617340088,
|
3423 |
+
"learning_rate": 5.7209344909076036e-08,
|
3424 |
+
"loss": 1.271,
|
3425 |
+
"step": 488
|
3426 |
+
},
|
3427 |
+
{
|
3428 |
+
"epoch": 0.988377968671046,
|
3429 |
+
"grad_norm": 5.515243053436279,
|
3430 |
+
"learning_rate": 3.973102726976819e-08,
|
3431 |
+
"loss": 1.1683,
|
3432 |
+
"step": 489
|
3433 |
+
},
|
3434 |
+
{
|
3435 |
+
"epoch": 0.9903991915108641,
|
3436 |
+
"grad_norm": 5.492679595947266,
|
3437 |
+
"learning_rate": 2.542906999836725e-08,
|
3438 |
+
"loss": 1.2462,
|
3439 |
+
"step": 490
|
3440 |
+
},
|
3441 |
+
{
|
3442 |
+
"epoch": 0.9924204143506822,
|
3443 |
+
"grad_norm": 5.038405418395996,
|
3444 |
+
"learning_rate": 1.4304382380819769e-08,
|
3445 |
+
"loss": 1.1106,
|
3446 |
+
"step": 491
|
3447 |
+
},
|
3448 |
+
{
|
3449 |
+
"epoch": 0.9944416371905003,
|
3450 |
+
"grad_norm": 5.300425052642822,
|
3451 |
+
"learning_rate": 6.357671699486201e-09,
|
3452 |
+
"loss": 1.1875,
|
3453 |
+
"step": 492
|
3454 |
+
},
|
3455 |
+
{
|
3456 |
+
"epoch": 0.9964628600303184,
|
3457 |
+
"grad_norm": 5.133315086364746,
|
3458 |
+
"learning_rate": 1.5894431881657845e-09,
|
3459 |
+
"loss": 1.1627,
|
3460 |
+
"step": 493
|
3461 |
+
},
|
3462 |
+
{
|
3463 |
+
"epoch": 0.9984840828701365,
|
3464 |
+
"grad_norm": 5.80331563949585,
|
3465 |
+
"learning_rate": 0.0,
|
3466 |
+
"loss": 1.1667,
|
3467 |
+
"step": 494
|
3468 |
}
|
3469 |
],
|
3470 |
"logging_steps": 1,
|
|
|
3479 |
"should_evaluate": false,
|
3480 |
"should_log": false,
|
3481 |
"should_save": true,
|
3482 |
+
"should_training_stop": true
|
3483 |
},
|
3484 |
"attributes": {}
|
3485 |
}
|
3486 |
},
|
3487 |
+
"total_flos": 2.2725168948314112e+17,
|
3488 |
"train_batch_size": 4,
|
3489 |
"trial_name": null,
|
3490 |
"trial_params": null
|