File size: 3,049 Bytes
4557df8 2ef6291 4557df8 2ef6291 4557df8 d88a8f4 4557df8 d88a8f4 4557df8 d88a8f4 4557df8 d88a8f4 4557df8 d88a8f4 4557df8 d88a8f4 4557df8 5e375bb d88a8f4 5e375bb d88a8f4 5e375bb d88a8f4 5e375bb d88a8f4 5e375bb d88a8f4 5e375bb d88a8f4 5e375bb d88a8f4 5e375bb 2ef6291 4557df8 d88a8f4 4557df8 2ef6291 4557df8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.00034053501835105374,
"eval_steps": 3,
"global_step": 9,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 3.7837224261228196e-05,
"grad_norm": 14.805928230285645,
"learning_rate": 2e-05,
"loss": 4.2775,
"step": 1
},
{
"epoch": 3.7837224261228196e-05,
"eval_loss": 1.0869630575180054,
"eval_runtime": 2378.1464,
"eval_samples_per_second": 2.34,
"eval_steps_per_second": 2.34,
"step": 1
},
{
"epoch": 7.567444852245639e-05,
"grad_norm": 14.053690910339355,
"learning_rate": 4e-05,
"loss": 4.198,
"step": 2
},
{
"epoch": 0.00011351167278368459,
"grad_norm": 9.68853759765625,
"learning_rate": 6e-05,
"loss": 4.309,
"step": 3
},
{
"epoch": 0.00011351167278368459,
"eval_loss": 0.9365310072898865,
"eval_runtime": 2383.4538,
"eval_samples_per_second": 2.334,
"eval_steps_per_second": 2.334,
"step": 3
},
{
"epoch": 0.00015134889704491279,
"grad_norm": 7.027285099029541,
"learning_rate": 8e-05,
"loss": 3.8788,
"step": 4
},
{
"epoch": 0.00018918612130614098,
"grad_norm": 4.5182366371154785,
"learning_rate": 0.0001,
"loss": 3.2846,
"step": 5
},
{
"epoch": 0.00022702334556736918,
"grad_norm": 3.581972122192383,
"learning_rate": 0.00012,
"loss": 3.2404,
"step": 6
},
{
"epoch": 0.00022702334556736918,
"eval_loss": 0.7990255951881409,
"eval_runtime": 2317.8914,
"eval_samples_per_second": 2.4,
"eval_steps_per_second": 2.4,
"step": 6
},
{
"epoch": 0.00026486056982859735,
"grad_norm": 4.4711103439331055,
"learning_rate": 0.00014,
"loss": 3.3662,
"step": 7
},
{
"epoch": 0.00030269779408982557,
"grad_norm": 3.5810935497283936,
"learning_rate": 0.00016,
"loss": 2.9538,
"step": 8
},
{
"epoch": 0.00034053501835105374,
"grad_norm": 3.5936524868011475,
"learning_rate": 0.00018,
"loss": 2.434,
"step": 9
},
{
"epoch": 0.00034053501835105374,
"eval_loss": 0.6861280202865601,
"eval_runtime": 2598.99,
"eval_samples_per_second": 2.141,
"eval_steps_per_second": 2.141,
"step": 9
}
],
"logging_steps": 1,
"max_steps": 10,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 3,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 6328165488132096.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}
|