File size: 1,680 Bytes
af2c5aa 89e71af af2c5aa 89e71af af2c5aa 89e71af af2c5aa 89e71af af2c5aa 89e71af af2c5aa 89e71af af2c5aa 89e71af af2c5aa 89e71af af2c5aa 89e71af af2c5aa 89e71af af2c5aa 89e71af af2c5aa 89e71af af2c5aa 780a12c 89e71af af2c5aa 780a12c af2c5aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 3300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.45,
"grad_norm": 4.830384254455566,
"learning_rate": 4.242424242424243e-05,
"loss": 0.5576,
"step": 500
},
{
"epoch": 0.91,
"grad_norm": 2.6578476428985596,
"learning_rate": 3.484848484848485e-05,
"loss": 0.4035,
"step": 1000
},
{
"epoch": 1.36,
"grad_norm": 6.386310577392578,
"learning_rate": 2.7272727272727273e-05,
"loss": 0.2828,
"step": 1500
},
{
"epoch": 1.82,
"grad_norm": 1.7881163358688354,
"learning_rate": 1.9696969696969697e-05,
"loss": 0.2468,
"step": 2000
},
{
"epoch": 2.27,
"grad_norm": 2.0737249851226807,
"learning_rate": 1.2121212121212122e-05,
"loss": 0.1608,
"step": 2500
},
{
"epoch": 2.73,
"grad_norm": 0.20467181503772736,
"learning_rate": 4.5454545454545455e-06,
"loss": 0.1321,
"step": 3000
},
{
"epoch": 3.0,
"step": 3300,
"total_flos": 2871828253461180.0,
"train_loss": 0.2811542418508819,
"train_runtime": 449.7464,
"train_samples_per_second": 117.386,
"train_steps_per_second": 7.337
}
],
"logging_steps": 500,
"max_steps": 3300,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 2871828253461180.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}
|