File size: 1,695 Bytes
ab808b0 2cbfd51 ab808b0 2cbfd51 ab808b0 2cbfd51 ab808b0 2cbfd51 ab808b0 2cbfd51 ab808b0 2cbfd51 ab808b0 2cbfd51 ab808b0 2cbfd51 ab808b0 2cbfd51 ab808b0 2cbfd51 ab808b0 2cbfd51 ab808b0 2cbfd51 ab808b0 2cbfd51 ab808b0 2cbfd51 ab808b0 2cbfd51 ab808b0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.0,
"eval_steps": 500,
"global_step": 1656,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9057971014492754,
"grad_norm": 1.3455069065093994,
"learning_rate": 0.0002,
"loss": 0.9137,
"step": 250
},
{
"epoch": 1.8115942028985508,
"grad_norm": 0.6821095943450928,
"learning_rate": 0.0002,
"loss": 0.4615,
"step": 500
},
{
"epoch": 2.717391304347826,
"grad_norm": 1.3968886137008667,
"learning_rate": 0.0002,
"loss": 0.3043,
"step": 750
},
{
"epoch": 3.6231884057971016,
"grad_norm": 0.9846513271331787,
"learning_rate": 0.0002,
"loss": 0.2248,
"step": 1000
},
{
"epoch": 4.528985507246377,
"grad_norm": 0.889771580696106,
"learning_rate": 0.0002,
"loss": 0.1835,
"step": 1250
},
{
"epoch": 5.434782608695652,
"grad_norm": 0.5584134459495544,
"learning_rate": 0.0002,
"loss": 0.1685,
"step": 1500
}
],
"logging_steps": 250,
"max_steps": 1656,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.1047252202307584e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|