|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 87, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3e-05, |
|
"loss": 2.6543, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.05133992343294669, |
|
"eval_loss": 2.611328125, |
|
"eval_runtime": 3.8997, |
|
"eval_samples_per_second": 22.053, |
|
"eval_steps_per_second": 1.539, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3e-05, |
|
"loss": 2.6077, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.05133992343294669, |
|
"eval_loss": 2.611328125, |
|
"eval_runtime": 3.9386, |
|
"eval_samples_per_second": 21.835, |
|
"eval_steps_per_second": 1.523, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9990221430845156e-05, |
|
"loss": 2.5964, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.05192560425118565, |
|
"eval_loss": 2.560546875, |
|
"eval_runtime": 3.8896, |
|
"eval_samples_per_second": 22.11, |
|
"eval_steps_per_second": 1.543, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.996089847276925e-05, |
|
"loss": 2.7302, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.052582709559453746, |
|
"eval_loss": 2.5234375, |
|
"eval_runtime": 4.1976, |
|
"eval_samples_per_second": 20.488, |
|
"eval_steps_per_second": 1.429, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.9912069357315394e-05, |
|
"loss": 2.7004, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.05293983200959945, |
|
"eval_loss": 2.5078125, |
|
"eval_runtime": 4.2482, |
|
"eval_samples_per_second": 20.244, |
|
"eval_steps_per_second": 1.412, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.9843797748334563e-05, |
|
"loss": 2.5681, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.05318267527569853, |
|
"eval_loss": 2.494140625, |
|
"eval_runtime": 4.2937, |
|
"eval_samples_per_second": 20.029, |
|
"eval_steps_per_second": 1.397, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.975617265898004e-05, |
|
"loss": 2.6404, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.05342551854179761, |
|
"eval_loss": 2.48828125, |
|
"eval_runtime": 4.3127, |
|
"eval_samples_per_second": 19.941, |
|
"eval_steps_per_second": 1.391, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.96493083356513e-05, |
|
"loss": 2.5325, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.053554082623850065, |
|
"eval_loss": 2.48046875, |
|
"eval_runtime": 4.2885, |
|
"eval_samples_per_second": 20.053, |
|
"eval_steps_per_second": 1.399, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.952334410903845e-05, |
|
"loss": 2.7205, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.053554082623850065, |
|
"eval_loss": 2.474609375, |
|
"eval_runtime": 3.8974, |
|
"eval_samples_per_second": 22.066, |
|
"eval_steps_per_second": 1.539, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.937844421246162e-05, |
|
"loss": 2.5149, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.05329695445974516, |
|
"eval_loss": 2.46484375, |
|
"eval_runtime": 4.2968, |
|
"eval_samples_per_second": 20.015, |
|
"eval_steps_per_second": 1.396, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.9214797567742036e-05, |
|
"loss": 2.5017, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.05345408833780927, |
|
"eval_loss": 2.451171875, |
|
"eval_runtime": 4.2885, |
|
"eval_samples_per_second": 20.054, |
|
"eval_steps_per_second": 1.399, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9032617538884018e-05, |
|
"loss": 2.7026, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.053896920175989946, |
|
"eval_loss": 2.439453125, |
|
"eval_runtime": 4.2979, |
|
"eval_samples_per_second": 20.01, |
|
"eval_steps_per_second": 1.396, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.8832141653888998e-05, |
|
"loss": 2.5259, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.05425404262613565, |
|
"eval_loss": 2.431640625, |
|
"eval_runtime": 4.2986, |
|
"eval_samples_per_second": 20.007, |
|
"eval_steps_per_second": 1.396, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.861363129506436e-05, |
|
"loss": 2.563, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.05455402548425804, |
|
"eval_loss": 2.421875, |
|
"eval_runtime": 4.2879, |
|
"eval_samples_per_second": 20.056, |
|
"eval_steps_per_second": 1.399, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.8377371358230733e-05, |
|
"loss": 2.5679, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.055011142220444544, |
|
"eval_loss": 2.4140625, |
|
"eval_runtime": 3.9042, |
|
"eval_samples_per_second": 22.027, |
|
"eval_steps_per_second": 1.537, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.8123669881272247e-05, |
|
"loss": 2.3701, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.05512542140449117, |
|
"eval_loss": 2.408203125, |
|
"eval_runtime": 3.9081, |
|
"eval_samples_per_second": 22.006, |
|
"eval_steps_per_second": 1.535, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.8123669881272247e-05, |
|
"loss": 2.4739, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.05512542140449117, |
|
"eval_loss": 2.408203125, |
|
"eval_runtime": 4.3256, |
|
"eval_samples_per_second": 19.882, |
|
"eval_steps_per_second": 1.387, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.7852857642513838e-05, |
|
"loss": 2.481, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.054839723444374606, |
|
"eval_loss": 2.40234375, |
|
"eval_runtime": 3.8988, |
|
"eval_samples_per_second": 22.058, |
|
"eval_steps_per_second": 1.539, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.7565287729449473e-05, |
|
"loss": 2.5795, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.054896863036397923, |
|
"eval_loss": 2.39453125, |
|
"eval_runtime": 4.2984, |
|
"eval_samples_per_second": 20.007, |
|
"eval_steps_per_second": 1.396, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.7261335078383377e-05, |
|
"loss": 2.4902, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.05491114793440375, |
|
"eval_loss": 2.38671875, |
|
"eval_runtime": 4.3044, |
|
"eval_samples_per_second": 19.98, |
|
"eval_steps_per_second": 1.394, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.6941395985584656e-05, |
|
"loss": 2.4509, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.055139706302497, |
|
"eval_loss": 2.380859375, |
|
"eval_runtime": 4.2949, |
|
"eval_samples_per_second": 20.024, |
|
"eval_steps_per_second": 1.397, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.6605887590592547e-05, |
|
"loss": 2.6052, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.055325409976572766, |
|
"eval_loss": 2.373046875, |
|
"eval_runtime": 4.2966, |
|
"eval_samples_per_second": 20.016, |
|
"eval_steps_per_second": 1.396, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.6255247332346036e-05, |
|
"loss": 2.3323, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.05552539854865436, |
|
"eval_loss": 2.36328125, |
|
"eval_runtime": 3.5753, |
|
"eval_samples_per_second": 24.054, |
|
"eval_steps_per_second": 1.678, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.5889932378846963e-05, |
|
"loss": 2.5994, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.05563967773270099, |
|
"eval_loss": 2.35546875, |
|
"eval_runtime": 4.2029, |
|
"eval_samples_per_second": 20.462, |
|
"eval_steps_per_second": 1.428, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.5510419031100137e-05, |
|
"loss": 2.3347, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.05558253814067768, |
|
"eval_loss": 2.34765625, |
|
"eval_runtime": 4.4126, |
|
"eval_samples_per_second": 19.49, |
|
"eval_steps_per_second": 1.36, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.5117202102107707e-05, |
|
"loss": 2.421, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.05586823610079424, |
|
"eval_loss": 2.33984375, |
|
"eval_runtime": 4.3, |
|
"eval_samples_per_second": 20.0, |
|
"eval_steps_per_second": 1.395, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.4710794271727415e-05, |
|
"loss": 2.5337, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.05603965487686418, |
|
"eval_loss": 2.3359375, |
|
"eval_runtime": 3.2828, |
|
"eval_samples_per_second": 26.197, |
|
"eval_steps_per_second": 1.828, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.4291725418235848e-05, |
|
"loss": 2.4102, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.0562539283469516, |
|
"eval_loss": 2.33203125, |
|
"eval_runtime": 4.3038, |
|
"eval_samples_per_second": 19.982, |
|
"eval_steps_per_second": 1.394, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.3860541927468265e-05, |
|
"loss": 2.4309, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.05643963202102737, |
|
"eval_loss": 2.326171875, |
|
"eval_runtime": 4.2875, |
|
"eval_samples_per_second": 20.058, |
|
"eval_steps_per_second": 1.399, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.341780598043574e-05, |
|
"loss": 1.9305, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_accuracy": 0.0563539226329924, |
|
"eval_loss": 2.322265625, |
|
"eval_runtime": 4.2962, |
|
"eval_samples_per_second": 20.017, |
|
"eval_steps_per_second": 1.397, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.2964094820348302e-05, |
|
"loss": 1.8601, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.056696760185132276, |
|
"eval_loss": 2.3203125, |
|
"eval_runtime": 4.32, |
|
"eval_samples_per_second": 19.907, |
|
"eval_steps_per_second": 1.389, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.25e-05, |
|
"loss": 1.8682, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.056425347123021545, |
|
"eval_loss": 2.328125, |
|
"eval_runtime": 4.3035, |
|
"eval_samples_per_second": 19.984, |
|
"eval_steps_per_second": 1.394, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.2026126610496852e-05, |
|
"loss": 1.8657, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_accuracy": 0.056382492429004054, |
|
"eval_loss": 2.353515625, |
|
"eval_runtime": 4.2925, |
|
"eval_samples_per_second": 20.035, |
|
"eval_steps_per_second": 1.398, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.154309249233351e-05, |
|
"loss": 2.063, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_accuracy": 0.056696760185132276, |
|
"eval_loss": 2.33984375, |
|
"eval_runtime": 3.2879, |
|
"eval_samples_per_second": 26.157, |
|
"eval_steps_per_second": 1.825, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.105152742984713e-05, |
|
"loss": 1.6443, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_accuracy": 0.05683960916519056, |
|
"eval_loss": 2.32421875, |
|
"eval_runtime": 4.3025, |
|
"eval_samples_per_second": 19.988, |
|
"eval_steps_per_second": 1.395, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.055207233009872e-05, |
|
"loss": 1.7592, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_accuracy": 0.056882463859208046, |
|
"eval_loss": 2.31640625, |
|
"eval_runtime": 4.2776, |
|
"eval_samples_per_second": 20.105, |
|
"eval_steps_per_second": 1.403, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.0045378387252624e-05, |
|
"loss": 1.8981, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_accuracy": 0.05686817896120222, |
|
"eval_loss": 2.310546875, |
|
"eval_runtime": 3.5855, |
|
"eval_samples_per_second": 23.986, |
|
"eval_steps_per_second": 1.673, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.953210623354359e-05, |
|
"loss": 1.9379, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_accuracy": 0.05728244100337124, |
|
"eval_loss": 2.3046875, |
|
"eval_runtime": 4.2888, |
|
"eval_samples_per_second": 20.052, |
|
"eval_steps_per_second": 1.399, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.9012925077938318e-05, |
|
"loss": 1.6008, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_accuracy": 0.057368150391406206, |
|
"eval_loss": 2.302734375, |
|
"eval_runtime": 4.1858, |
|
"eval_samples_per_second": 20.546, |
|
"eval_steps_per_second": 1.433, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.848851183361466e-05, |
|
"loss": 1.595, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_accuracy": 0.057453859779441174, |
|
"eval_loss": 2.302734375, |
|
"eval_runtime": 4.3045, |
|
"eval_samples_per_second": 19.979, |
|
"eval_steps_per_second": 1.394, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.7959550235396002e-05, |
|
"loss": 1.7096, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_accuracy": 0.05752528426947032, |
|
"eval_loss": 2.302734375, |
|
"eval_runtime": 4.3092, |
|
"eval_samples_per_second": 19.957, |
|
"eval_steps_per_second": 1.392, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.7426729948291474e-05, |
|
"loss": 1.7245, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_accuracy": 0.0575681389634878, |
|
"eval_loss": 2.302734375, |
|
"eval_runtime": 3.5821, |
|
"eval_samples_per_second": 24.008, |
|
"eval_steps_per_second": 1.675, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.689074566830434e-05, |
|
"loss": 1.795, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_accuracy": 0.05768241814753443, |
|
"eval_loss": 2.30078125, |
|
"eval_runtime": 4.2953, |
|
"eval_samples_per_second": 20.022, |
|
"eval_steps_per_second": 1.397, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.635229621668098e-05, |
|
"loss": 1.7241, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_accuracy": 0.057610993657505286, |
|
"eval_loss": 2.30078125, |
|
"eval_runtime": 4.3019, |
|
"eval_samples_per_second": 19.991, |
|
"eval_steps_per_second": 1.395, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.5812083628781265e-05, |
|
"loss": 1.6356, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_accuracy": 0.057639563453516944, |
|
"eval_loss": 2.298828125, |
|
"eval_runtime": 4.2961, |
|
"eval_samples_per_second": 20.018, |
|
"eval_steps_per_second": 1.397, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.5270812238758407e-05, |
|
"loss": 1.77, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_accuracy": 0.057553854065481976, |
|
"eval_loss": 2.296875, |
|
"eval_runtime": 4.3142, |
|
"eval_samples_per_second": 19.934, |
|
"eval_steps_per_second": 1.391, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.4729187761241592e-05, |
|
"loss": 1.6675, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_accuracy": 0.057668133249528596, |
|
"eval_loss": 2.29296875, |
|
"eval_runtime": 4.2943, |
|
"eval_samples_per_second": 20.026, |
|
"eval_steps_per_second": 1.397, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.4187916371218739e-05, |
|
"loss": 1.6929, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_accuracy": 0.05771098794354608, |
|
"eval_loss": 2.291015625, |
|
"eval_runtime": 3.8871, |
|
"eval_samples_per_second": 22.124, |
|
"eval_steps_per_second": 1.544, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.3647703783319022e-05, |
|
"loss": 1.6635, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_accuracy": 0.05762527855551111, |
|
"eval_loss": 2.291015625, |
|
"eval_runtime": 4.3038, |
|
"eval_samples_per_second": 19.982, |
|
"eval_steps_per_second": 1.394, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.310925433169566e-05, |
|
"loss": 1.6093, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_accuracy": 0.05781098222958688, |
|
"eval_loss": 2.291015625, |
|
"eval_runtime": 3.8811, |
|
"eval_samples_per_second": 22.159, |
|
"eval_steps_per_second": 1.546, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.2573270051708529e-05, |
|
"loss": 1.7362, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_accuracy": 0.05796811610765099, |
|
"eval_loss": 2.2890625, |
|
"eval_runtime": 4.3003, |
|
"eval_samples_per_second": 19.998, |
|
"eval_steps_per_second": 1.395, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.2040449764604002e-05, |
|
"loss": 1.7015, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_accuracy": 0.05805382549568596, |
|
"eval_loss": 2.28515625, |
|
"eval_runtime": 4.2877, |
|
"eval_samples_per_second": 20.057, |
|
"eval_steps_per_second": 1.399, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.1511488166385349e-05, |
|
"loss": 1.9515, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_accuracy": 0.05816810467973259, |
|
"eval_loss": 2.28125, |
|
"eval_runtime": 4.3062, |
|
"eval_samples_per_second": 19.971, |
|
"eval_steps_per_second": 1.393, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.098707492206169e-05, |
|
"loss": 1.6494, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_accuracy": 0.05801097080166848, |
|
"eval_loss": 2.27734375, |
|
"eval_runtime": 3.8918, |
|
"eval_samples_per_second": 22.098, |
|
"eval_steps_per_second": 1.542, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.0467893766456408e-05, |
|
"loss": 1.7522, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_accuracy": 0.05798240100565682, |
|
"eval_loss": 2.2734375, |
|
"eval_runtime": 4.1919, |
|
"eval_samples_per_second": 20.516, |
|
"eval_steps_per_second": 1.431, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 9.954621612747371e-06, |
|
"loss": 1.7369, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_accuracy": 0.05806811039369179, |
|
"eval_loss": 2.267578125, |
|
"eval_runtime": 4.3092, |
|
"eval_samples_per_second": 19.957, |
|
"eval_steps_per_second": 1.392, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 9.447927669901284e-06, |
|
"loss": 1.6528, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.058125249985715104, |
|
"eval_loss": 2.263671875, |
|
"eval_runtime": 3.8882, |
|
"eval_samples_per_second": 22.118, |
|
"eval_steps_per_second": 1.543, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 8.948472570152874e-06, |
|
"loss": 1.51, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.05826809896577338, |
|
"eval_loss": 2.26171875, |
|
"eval_runtime": 4.2958, |
|
"eval_samples_per_second": 20.019, |
|
"eval_steps_per_second": 1.397, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 8.456907507666488e-06, |
|
"loss": 1.4579, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_accuracy": 0.05845380263984915, |
|
"eval_loss": 2.263671875, |
|
"eval_runtime": 4.2959, |
|
"eval_samples_per_second": 20.019, |
|
"eval_steps_per_second": 1.397, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 7.97387338950315e-06, |
|
"loss": 1.2645, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_accuracy": 0.05851094223187246, |
|
"eval_loss": 2.26953125, |
|
"eval_runtime": 4.2999, |
|
"eval_samples_per_second": 20.0, |
|
"eval_steps_per_second": 1.395, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 7.500000000000004e-06, |
|
"loss": 1.2424, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_accuracy": 0.05839666304782584, |
|
"eval_loss": 2.27734375, |
|
"eval_runtime": 4.1932, |
|
"eval_samples_per_second": 20.51, |
|
"eval_steps_per_second": 1.431, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 7.035905179651701e-06, |
|
"loss": 1.2117, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.058425232843837493, |
|
"eval_loss": 2.2890625, |
|
"eval_runtime": 4.3132, |
|
"eval_samples_per_second": 19.939, |
|
"eval_steps_per_second": 1.391, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 6.582194019564266e-06, |
|
"loss": 1.4059, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_accuracy": 0.058039540597680135, |
|
"eval_loss": 2.30078125, |
|
"eval_runtime": 3.5756, |
|
"eval_samples_per_second": 24.052, |
|
"eval_steps_per_second": 1.678, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 6.1394580725317366e-06, |
|
"loss": 1.328, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_accuracy": 0.05811096508770927, |
|
"eval_loss": 2.314453125, |
|
"eval_runtime": 4.2869, |
|
"eval_samples_per_second": 20.061, |
|
"eval_steps_per_second": 1.4, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 5.708274581764155e-06, |
|
"loss": 1.3436, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_accuracy": 0.05795383120964517, |
|
"eval_loss": 2.328125, |
|
"eval_runtime": 3.8909, |
|
"eval_samples_per_second": 22.103, |
|
"eval_steps_per_second": 1.542, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 5.289205728272587e-06, |
|
"loss": 1.389, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_accuracy": 0.058039540597680135, |
|
"eval_loss": 2.337890625, |
|
"eval_runtime": 4.3156, |
|
"eval_samples_per_second": 19.928, |
|
"eval_steps_per_second": 1.39, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.882797897892293e-06, |
|
"loss": 1.2127, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_accuracy": 0.0580252556996743, |
|
"eval_loss": 2.33984375, |
|
"eval_runtime": 4.3, |
|
"eval_samples_per_second": 20.0, |
|
"eval_steps_per_second": 1.395, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.4895809688998655e-06, |
|
"loss": 1.3645, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_accuracy": 0.058096680189703445, |
|
"eval_loss": 2.341796875, |
|
"eval_runtime": 4.216, |
|
"eval_samples_per_second": 20.398, |
|
"eval_steps_per_second": 1.423, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 4.110067621153041e-06, |
|
"loss": 1.3389, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_accuracy": 0.05805382549568596, |
|
"eval_loss": 2.337890625, |
|
"eval_runtime": 3.9033, |
|
"eval_samples_per_second": 22.032, |
|
"eval_steps_per_second": 1.537, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.744752667653965e-06, |
|
"loss": 1.2549, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_accuracy": 0.05808239529169762, |
|
"eval_loss": 2.33203125, |
|
"eval_runtime": 3.5918, |
|
"eval_samples_per_second": 23.943, |
|
"eval_steps_per_second": 1.67, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.394112409407455e-06, |
|
"loss": 1.2193, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_accuracy": 0.05816810467973259, |
|
"eval_loss": 2.328125, |
|
"eval_runtime": 4.2985, |
|
"eval_samples_per_second": 20.007, |
|
"eval_steps_per_second": 1.396, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.0586040144153436e-06, |
|
"loss": 1.3617, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_accuracy": 0.0583252385577967, |
|
"eval_loss": 2.322265625, |
|
"eval_runtime": 3.2953, |
|
"eval_samples_per_second": 26.097, |
|
"eval_steps_per_second": 1.821, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 2.7386649216166233e-06, |
|
"loss": 1.2336, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_accuracy": 0.058253814067767556, |
|
"eval_loss": 2.318359375, |
|
"eval_runtime": 4.1807, |
|
"eval_samples_per_second": 20.571, |
|
"eval_steps_per_second": 1.435, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.4347122705505303e-06, |
|
"loss": 1.179, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_accuracy": 0.05829666876178504, |
|
"eval_loss": 2.314453125, |
|
"eval_runtime": 3.5945, |
|
"eval_samples_per_second": 23.926, |
|
"eval_steps_per_second": 1.669, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.1471423574861643e-06, |
|
"loss": 1.2468, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_accuracy": 0.058282383863779215, |
|
"eval_loss": 2.3125, |
|
"eval_runtime": 3.5979, |
|
"eval_samples_per_second": 23.903, |
|
"eval_steps_per_second": 1.668, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.8763301187277554e-06, |
|
"loss": 1.3325, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_accuracy": 0.05829666876178504, |
|
"eval_loss": 2.30859375, |
|
"eval_runtime": 3.9093, |
|
"eval_samples_per_second": 21.999, |
|
"eval_steps_per_second": 1.535, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.6226286417692666e-06, |
|
"loss": 1.1471, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_accuracy": 0.058339523455802525, |
|
"eval_loss": 2.306640625, |
|
"eval_runtime": 4.2929, |
|
"eval_samples_per_second": 20.033, |
|
"eval_steps_per_second": 1.398, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.3863687049356465e-06, |
|
"loss": 1.3123, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_accuracy": 0.0583252385577967, |
|
"eval_loss": 2.306640625, |
|
"eval_runtime": 4.3098, |
|
"eval_samples_per_second": 19.955, |
|
"eval_steps_per_second": 1.392, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.1678583461110026e-06, |
|
"loss": 1.3285, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_accuracy": 0.05845380263984915, |
|
"eval_loss": 2.3046875, |
|
"eval_runtime": 4.3011, |
|
"eval_samples_per_second": 19.995, |
|
"eval_steps_per_second": 1.395, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 9.67382461115986e-07, |
|
"loss": 1.3232, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_accuracy": 0.05836809325181418, |
|
"eval_loss": 2.302734375, |
|
"eval_runtime": 4.2978, |
|
"eval_samples_per_second": 20.01, |
|
"eval_steps_per_second": 1.396, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 7.852024322579649e-07, |
|
"loss": 1.1228, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_accuracy": 0.05835380835380835, |
|
"eval_loss": 2.302734375, |
|
"eval_runtime": 3.6063, |
|
"eval_samples_per_second": 23.847, |
|
"eval_steps_per_second": 1.664, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 6.215557875383804e-07, |
|
"loss": 1.3524, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_accuracy": 0.05839666304782584, |
|
"eval_loss": 2.302734375, |
|
"eval_runtime": 4.1961, |
|
"eval_samples_per_second": 20.495, |
|
"eval_steps_per_second": 1.43, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 4.766558909615504e-07, |
|
"loss": 1.2042, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_accuracy": 0.058339523455802525, |
|
"eval_loss": 2.302734375, |
|
"eval_runtime": 4.3056, |
|
"eval_samples_per_second": 19.974, |
|
"eval_steps_per_second": 1.394, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.5069166434870014e-07, |
|
"loss": 1.3588, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_accuracy": 0.058339523455802525, |
|
"eval_loss": 2.30078125, |
|
"eval_runtime": 4.1957, |
|
"eval_samples_per_second": 20.497, |
|
"eval_steps_per_second": 1.43, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.438273410199598e-07, |
|
"loss": 1.2982, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_accuracy": 0.058425232843837493, |
|
"eval_loss": 2.30078125, |
|
"eval_runtime": 3.898, |
|
"eval_samples_per_second": 22.062, |
|
"eval_steps_per_second": 1.539, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.5620225166544155e-07, |
|
"loss": 1.4373, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.05845380263984915, |
|
"eval_loss": 2.30078125, |
|
"eval_runtime": 4.3019, |
|
"eval_samples_per_second": 19.991, |
|
"eval_steps_per_second": 1.395, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 8.793064268460605e-08, |
|
"loss": 1.3562, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.05841094794583167, |
|
"eval_loss": 2.30078125, |
|
"eval_runtime": 4.2946, |
|
"eval_samples_per_second": 20.025, |
|
"eval_steps_per_second": 1.397, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 87, |
|
"total_flos": 4783591391232.0, |
|
"train_loss": 1.8617539679867097, |
|
"train_runtime": 1017.4729, |
|
"train_samples_per_second": 1.353, |
|
"train_steps_per_second": 0.086 |
|
} |
|
], |
|
"max_steps": 87, |
|
"num_train_epochs": 3, |
|
"total_flos": 4783591391232.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|