|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.412280701754386, |
|
"global_step": 2200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.973626373626374e-05, |
|
"loss": 1.622, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.24010217113665389, |
|
"eval_loss": 1.6581733226776123, |
|
"eval_runtime": 381.1384, |
|
"eval_samples_per_second": 4.109, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.9406593406593407e-05, |
|
"loss": 1.3721, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.2835249042145594, |
|
"eval_loss": 1.9170576333999634, |
|
"eval_runtime": 378.5115, |
|
"eval_samples_per_second": 4.137, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.907692307692308e-05, |
|
"loss": 1.1269, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.36845466155810985, |
|
"eval_loss": 1.7032861709594727, |
|
"eval_runtime": 385.3173, |
|
"eval_samples_per_second": 4.064, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.8747252747252748e-05, |
|
"loss": 0.9152, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.388250319284802, |
|
"eval_loss": 1.7423878908157349, |
|
"eval_runtime": 388.0594, |
|
"eval_samples_per_second": 4.035, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.8417582417582416e-05, |
|
"loss": 0.732, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.4616858237547893, |
|
"eval_loss": 1.5253428220748901, |
|
"eval_runtime": 386.9842, |
|
"eval_samples_per_second": 4.047, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.8087912087912088e-05, |
|
"loss": 0.684, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.4367816091954023, |
|
"eval_loss": 1.6441736221313477, |
|
"eval_runtime": 388.1257, |
|
"eval_samples_per_second": 4.035, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.775824175824176e-05, |
|
"loss": 0.5538, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.5108556832694764, |
|
"eval_loss": 1.450364351272583, |
|
"eval_runtime": 389.4611, |
|
"eval_samples_per_second": 4.021, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.7428571428571428e-05, |
|
"loss": 0.5425, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.49936143039591313, |
|
"eval_loss": 1.4108269214630127, |
|
"eval_runtime": 389.7612, |
|
"eval_samples_per_second": 4.018, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.70989010989011e-05, |
|
"loss": 0.514, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.5057471264367817, |
|
"eval_loss": 1.4206087589263916, |
|
"eval_runtime": 396.4756, |
|
"eval_samples_per_second": 3.95, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.6769230769230772e-05, |
|
"loss": 0.3068, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.5159642401021711, |
|
"eval_loss": 1.6294729709625244, |
|
"eval_runtime": 394.6782, |
|
"eval_samples_per_second": 3.968, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.643956043956044e-05, |
|
"loss": 0.2865, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_accuracy": 0.5076628352490421, |
|
"eval_loss": 1.733625888824463, |
|
"eval_runtime": 393.6145, |
|
"eval_samples_per_second": 3.979, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.6109890109890112e-05, |
|
"loss": 0.301, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_accuracy": 0.6277139208173691, |
|
"eval_loss": 1.2089332342147827, |
|
"eval_runtime": 389.8374, |
|
"eval_samples_per_second": 4.017, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.578021978021978e-05, |
|
"loss": 0.2517, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_accuracy": 0.5983397190293742, |
|
"eval_loss": 1.3427765369415283, |
|
"eval_runtime": 388.9975, |
|
"eval_samples_per_second": 4.026, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.545054945054945e-05, |
|
"loss": 0.2819, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_accuracy": 0.5600255427841635, |
|
"eval_loss": 1.5083413124084473, |
|
"eval_runtime": 390.3746, |
|
"eval_samples_per_second": 4.012, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.512087912087912e-05, |
|
"loss": 0.2706, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_accuracy": 0.6047254150702427, |
|
"eval_loss": 1.5413047075271606, |
|
"eval_runtime": 389.1952, |
|
"eval_samples_per_second": 4.024, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.479120879120879e-05, |
|
"loss": 0.1991, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.598978288633461, |
|
"eval_loss": 1.5572402477264404, |
|
"eval_runtime": 389.1243, |
|
"eval_samples_per_second": 4.024, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.446153846153846e-05, |
|
"loss": 0.2726, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_accuracy": 0.6021711366538953, |
|
"eval_loss": 1.4013671875, |
|
"eval_runtime": 392.7301, |
|
"eval_samples_per_second": 3.987, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.4131868131868133e-05, |
|
"loss": 0.2015, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.5676883780332056, |
|
"eval_loss": 2.248255729675293, |
|
"eval_runtime": 389.7999, |
|
"eval_samples_per_second": 4.017, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.38021978021978e-05, |
|
"loss": 0.173, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_accuracy": 0.644316730523627, |
|
"eval_loss": 1.5393086671829224, |
|
"eval_runtime": 390.3149, |
|
"eval_samples_per_second": 4.012, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.3472527472527474e-05, |
|
"loss": 0.1177, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_accuracy": 0.6028097062579821, |
|
"eval_loss": 1.9018601179122925, |
|
"eval_runtime": 390.5708, |
|
"eval_samples_per_second": 4.01, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.3142857142857145e-05, |
|
"loss": 0.0907, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_accuracy": 0.6194125159642401, |
|
"eval_loss": 1.8655920028686523, |
|
"eval_runtime": 392.1244, |
|
"eval_samples_per_second": 3.994, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.2813186813186814e-05, |
|
"loss": 0.1878, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_accuracy": 0.6296296296296297, |
|
"eval_loss": 1.998379111289978, |
|
"eval_runtime": 390.0846, |
|
"eval_samples_per_second": 4.015, |
|
"step": 2200 |
|
} |
|
], |
|
"max_steps": 9120, |
|
"num_train_epochs": 10, |
|
"total_flos": 2.137068831744e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|