|
{ |
|
"best_metric": 2.237405776977539, |
|
"best_model_checkpoint": "output/rihanna/checkpoint-262", |
|
"epoch": 2.0, |
|
"global_step": 262, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00013670742670262692, |
|
"loss": 3.4044, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00013523678052634687, |
|
"loss": 2.8194, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00013280918103490095, |
|
"loss": 2.7475, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00012945949034742042, |
|
"loss": 2.9331, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00012523581249268407, |
|
"loss": 2.8944, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00012019880259978666, |
|
"loss": 2.6589, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00011442079584574986, |
|
"loss": 2.7123, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00010798476866903087, |
|
"loss": 2.5847, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00010098314716666811, |
|
"loss": 2.391, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.351647978736063e-05, |
|
"loss": 2.5899, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.5691993381587e-05, |
|
"loss": 2.3867, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.762205334494898e-05, |
|
"loss": 2.4515, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 6.942254996821776e-05, |
|
"loss": 2.7458, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 6.121123416728538e-05, |
|
"loss": 2.4949, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 5.310602649316754e-05, |
|
"loss": 2.5412, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.5223323705920566e-05, |
|
"loss": 2.5738, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.7676327231320786e-05, |
|
"loss": 2.2778, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.0573417504900444e-05, |
|
"loss": 2.0957, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.401659754895943e-05, |
|
"loss": 2.3988, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.8100028133934438e-05, |
|
"loss": 2.3799, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.2908675560288951e-05, |
|
"loss": 2.5075, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.517091479772992e-06, |
|
"loss": 2.1984, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.988342278719811e-06, |
|
"loss": 2.4497, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.3731033982246404e-06, |
|
"loss": 2.3199, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 7.089315974356758e-07, |
|
"loss": 2.3814, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.9725610793441152e-08, |
|
"loss": 2.1807, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.336484432220459, |
|
"eval_runtime": 8.0556, |
|
"eval_samples_per_second": 22.593, |
|
"eval_steps_per_second": 2.855, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.153829445781081e-07, |
|
"loss": 2.3816, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.5916577414195624e-06, |
|
"loss": 2.1418, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.8302217539407e-06, |
|
"loss": 2.1866, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 6.998927551907465e-06, |
|
"loss": 2.3514, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.1052270183036815e-05, |
|
"loss": 2.3302, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.5932040657672757e-05, |
|
"loss": 2.0237, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.1568161872809022e-05, |
|
"loss": 2.1338, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.7879694970972374e-05, |
|
"loss": 2.1066, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.477600168191081e-05, |
|
"loss": 2.1939, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.215804595500327e-05, |
|
"loss": 2.1481, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.991981618998877e-05, |
|
"loss": 2.4364, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.794984764173325e-05, |
|
"loss": 2.1677, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 6.613282313617852e-05, |
|
"loss": 2.3095, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 7.435122911001204e-05, |
|
"loss": 2.4404, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 8.248704319210595e-05, |
|
"loss": 2.0718, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.042342909181217e-05, |
|
"loss": 2.2015, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.804641445426643e-05, |
|
"loss": 2.1899, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00010524652758746261, |
|
"loss": 2.1465, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00011192036955648884, |
|
"loss": 2.3478, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00011797209906849287, |
|
"loss": 2.3704, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0001233148088243337, |
|
"loss": 2.2886, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00012787177357149405, |
|
"loss": 2.1345, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00013157755193529395, |
|
"loss": 2.0747, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00013437892620529645, |
|
"loss": 2.2118, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.000136235666580879, |
|
"loss": 2.1172, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00013712110890084145, |
|
"loss": 2.2353, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.237405776977539, |
|
"eval_runtime": 8.3034, |
|
"eval_samples_per_second": 22.521, |
|
"eval_steps_per_second": 2.89, |
|
"step": 262 |
|
} |
|
], |
|
"max_steps": 262, |
|
"num_train_epochs": 2, |
|
"total_flos": 272919527424000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|