File size: 4,833 Bytes
06909ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 500,
"global_step": 11080,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.18050541516245489,
"grad_norm": 31.555688858032227,
"learning_rate": 2.864620938628159e-05,
"loss": 1.3005,
"step": 500
},
{
"epoch": 0.36101083032490977,
"grad_norm": 20.2050838470459,
"learning_rate": 2.729241877256318e-05,
"loss": 0.9978,
"step": 1000
},
{
"epoch": 0.5415162454873647,
"grad_norm": 25.661306381225586,
"learning_rate": 2.5938628158844765e-05,
"loss": 0.9259,
"step": 1500
},
{
"epoch": 0.7220216606498195,
"grad_norm": 22.348859786987305,
"learning_rate": 2.4584837545126353e-05,
"loss": 0.8798,
"step": 2000
},
{
"epoch": 0.9025270758122743,
"grad_norm": 32.197166442871094,
"learning_rate": 2.3231046931407943e-05,
"loss": 0.8471,
"step": 2500
},
{
"epoch": 1.0830324909747293,
"grad_norm": 20.764020919799805,
"learning_rate": 2.1877256317689534e-05,
"loss": 0.771,
"step": 3000
},
{
"epoch": 1.263537906137184,
"grad_norm": 15.258258819580078,
"learning_rate": 2.0523465703971117e-05,
"loss": 0.6937,
"step": 3500
},
{
"epoch": 1.444043321299639,
"grad_norm": 24.816614151000977,
"learning_rate": 1.9169675090252708e-05,
"loss": 0.709,
"step": 4000
},
{
"epoch": 1.6245487364620939,
"grad_norm": 38.912071228027344,
"learning_rate": 1.7815884476534298e-05,
"loss": 0.6831,
"step": 4500
},
{
"epoch": 1.8050541516245486,
"grad_norm": 8.910807609558105,
"learning_rate": 1.6462093862815885e-05,
"loss": 0.6742,
"step": 5000
},
{
"epoch": 1.9855595667870036,
"grad_norm": 11.664189338684082,
"learning_rate": 1.5108303249097474e-05,
"loss": 0.69,
"step": 5500
},
{
"epoch": 2.1660649819494586,
"grad_norm": 10.968308448791504,
"learning_rate": 1.3754512635379063e-05,
"loss": 0.5436,
"step": 6000
},
{
"epoch": 2.3465703971119134,
"grad_norm": 11.711438179016113,
"learning_rate": 1.240072202166065e-05,
"loss": 0.5357,
"step": 6500
},
{
"epoch": 2.527075812274368,
"grad_norm": 13.477335929870605,
"learning_rate": 1.1046931407942239e-05,
"loss": 0.5359,
"step": 7000
},
{
"epoch": 2.707581227436823,
"grad_norm": 10.649256706237793,
"learning_rate": 9.693140794223826e-06,
"loss": 0.5394,
"step": 7500
},
{
"epoch": 2.888086642599278,
"grad_norm": 10.525208473205566,
"learning_rate": 8.339350180505416e-06,
"loss": 0.5254,
"step": 8000
},
{
"epoch": 3.068592057761733,
"grad_norm": 19.402320861816406,
"learning_rate": 6.985559566787004e-06,
"loss": 0.4775,
"step": 8500
},
{
"epoch": 3.2490974729241877,
"grad_norm": 41.23615646362305,
"learning_rate": 5.631768953068592e-06,
"loss": 0.4003,
"step": 9000
},
{
"epoch": 3.4296028880866425,
"grad_norm": 21.56231689453125,
"learning_rate": 4.277978339350181e-06,
"loss": 0.3952,
"step": 9500
},
{
"epoch": 3.6101083032490973,
"grad_norm": 11.254490852355957,
"learning_rate": 2.924187725631769e-06,
"loss": 0.4007,
"step": 10000
},
{
"epoch": 3.7906137184115525,
"grad_norm": 29.451414108276367,
"learning_rate": 1.5703971119133576e-06,
"loss": 0.3962,
"step": 10500
},
{
"epoch": 3.9711191335740073,
"grad_norm": 16.022735595703125,
"learning_rate": 2.1660649819494586e-07,
"loss": 0.3853,
"step": 11000
},
{
"epoch": 4.0,
"step": 11080,
"total_flos": 1.0399493167607808e+16,
"train_loss": 0.6484355885199261,
"train_runtime": 8433.1347,
"train_samples_per_second": 42.043,
"train_steps_per_second": 1.314
}
],
"logging_steps": 500,
"max_steps": 11080,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.0399493167607808e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}
|