|
{ |
|
"best_metric": 1.386866569519043, |
|
"best_model_checkpoint": "outputs_llama-2/checkpoint-240", |
|
"epoch": 0.3702275356729657, |
|
"eval_steps": 40, |
|
"global_step": 240, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2e-05, |
|
"loss": 3.4868, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4e-05, |
|
"loss": 3.7961, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6e-05, |
|
"loss": 3.9117, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8e-05, |
|
"loss": 3.5057, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001, |
|
"loss": 3.2378, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00012, |
|
"loss": 3.0761, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00014, |
|
"loss": 3.3794, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016, |
|
"loss": 2.728, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00018, |
|
"loss": 2.5244, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002, |
|
"loss": 2.47, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019968652037617558, |
|
"loss": 2.5237, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001993730407523511, |
|
"loss": 2.3919, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019905956112852667, |
|
"loss": 1.9547, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001987460815047022, |
|
"loss": 1.8513, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019843260188087775, |
|
"loss": 1.6401, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001981191222570533, |
|
"loss": 1.7872, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019780564263322884, |
|
"loss": 1.8782, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001974921630094044, |
|
"loss": 1.8139, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019717868338557995, |
|
"loss": 1.5255, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001968652037617555, |
|
"loss": 1.326, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019655172413793104, |
|
"loss": 1.7972, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001962382445141066, |
|
"loss": 1.4295, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019592476489028212, |
|
"loss": 1.6369, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001956112852664577, |
|
"loss": 1.7473, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019529780564263324, |
|
"loss": 1.6524, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019498432601880878, |
|
"loss": 1.5889, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019467084639498435, |
|
"loss": 1.3206, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019435736677115987, |
|
"loss": 1.9595, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019404388714733544, |
|
"loss": 1.5356, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019373040752351098, |
|
"loss": 1.932, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019341692789968652, |
|
"loss": 1.3679, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001931034482758621, |
|
"loss": 1.5176, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001927899686520376, |
|
"loss": 1.774, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019247648902821318, |
|
"loss": 1.4211, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019216300940438872, |
|
"loss": 2.0095, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019184952978056427, |
|
"loss": 1.1123, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001915360501567398, |
|
"loss": 1.5061, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019122257053291538, |
|
"loss": 1.8059, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019090909090909092, |
|
"loss": 1.361, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019059561128526647, |
|
"loss": 1.6624, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.4508017301559448, |
|
"eval_runtime": 866.2645, |
|
"eval_samples_per_second": 1.16, |
|
"eval_steps_per_second": 1.16, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000190282131661442, |
|
"loss": 1.0086, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00018996865203761755, |
|
"loss": 1.8394, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00018965517241379312, |
|
"loss": 1.595, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00018934169278996866, |
|
"loss": 1.4083, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001890282131661442, |
|
"loss": 1.6845, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00018871473354231978, |
|
"loss": 1.2298, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001884012539184953, |
|
"loss": 1.0909, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00018808777429467086, |
|
"loss": 1.1942, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001877742946708464, |
|
"loss": 1.7951, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00018746081504702195, |
|
"loss": 1.5837, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00018714733542319752, |
|
"loss": 1.1171, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00018683385579937304, |
|
"loss": 1.5556, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001865203761755486, |
|
"loss": 1.6377, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00018620689655172415, |
|
"loss": 1.7227, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001858934169278997, |
|
"loss": 1.6148, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00018557993730407524, |
|
"loss": 1.1987, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001852664576802508, |
|
"loss": 0.8116, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00018495297805642635, |
|
"loss": 1.627, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001846394984326019, |
|
"loss": 1.3519, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00018432601880877744, |
|
"loss": 1.1224, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00018401253918495298, |
|
"loss": 1.4279, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018369905956112855, |
|
"loss": 1.3011, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018338557993730406, |
|
"loss": 1.654, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018307210031347963, |
|
"loss": 0.8621, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018275862068965518, |
|
"loss": 1.3778, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018244514106583072, |
|
"loss": 1.7181, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001821316614420063, |
|
"loss": 1.603, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018181818181818183, |
|
"loss": 1.3475, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018150470219435738, |
|
"loss": 1.7242, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018119122257053292, |
|
"loss": 1.58, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018087774294670846, |
|
"loss": 1.4371, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018056426332288403, |
|
"loss": 1.3795, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018025078369905958, |
|
"loss": 1.1421, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00017993730407523512, |
|
"loss": 1.1617, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00017962382445141066, |
|
"loss": 1.4031, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001793103448275862, |
|
"loss": 2.0192, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00017899686520376175, |
|
"loss": 1.4762, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00017868338557993732, |
|
"loss": 1.4992, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00017836990595611286, |
|
"loss": 1.5983, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001780564263322884, |
|
"loss": 1.3888, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.416973352432251, |
|
"eval_runtime": 866.3924, |
|
"eval_samples_per_second": 1.16, |
|
"eval_steps_per_second": 1.16, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00017774294670846398, |
|
"loss": 1.0799, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001774294670846395, |
|
"loss": 1.3961, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00017711598746081506, |
|
"loss": 1.5792, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001768025078369906, |
|
"loss": 1.6384, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00017648902821316615, |
|
"loss": 1.3299, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00017617554858934172, |
|
"loss": 1.7483, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00017586206896551723, |
|
"loss": 1.7161, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001755485893416928, |
|
"loss": 1.3523, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00017523510971786835, |
|
"loss": 1.5451, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001749216300940439, |
|
"loss": 1.4589, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00017460815047021943, |
|
"loss": 1.4352, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000174294670846395, |
|
"loss": 1.5711, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00017398119122257055, |
|
"loss": 1.3834, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001736677115987461, |
|
"loss": 1.3734, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00017335423197492163, |
|
"loss": 1.5402, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00017304075235109718, |
|
"loss": 1.5848, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00017272727272727275, |
|
"loss": 1.3129, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00017241379310344826, |
|
"loss": 1.3945, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00017210031347962383, |
|
"loss": 1.79, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001717868338557994, |
|
"loss": 1.0874, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00017147335423197492, |
|
"loss": 1.617, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001711598746081505, |
|
"loss": 1.259, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00017084639498432603, |
|
"loss": 1.577, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00017053291536050158, |
|
"loss": 1.3163, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00017021943573667712, |
|
"loss": 1.3077, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00016990595611285266, |
|
"loss": 1.2611, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00016959247648902823, |
|
"loss": 1.8003, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00016927899686520377, |
|
"loss": 1.3783, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00016896551724137932, |
|
"loss": 1.3896, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00016865203761755486, |
|
"loss": 1.4663, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001683385579937304, |
|
"loss": 0.7607, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00016802507836990597, |
|
"loss": 0.9899, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00016771159874608152, |
|
"loss": 1.8002, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00016739811912225706, |
|
"loss": 1.5776, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001670846394984326, |
|
"loss": 1.551, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00016677115987460817, |
|
"loss": 1.4058, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001664576802507837, |
|
"loss": 1.0475, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00016614420062695926, |
|
"loss": 1.7153, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001658307210031348, |
|
"loss": 1.6289, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00016551724137931035, |
|
"loss": 1.2282, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.4044392108917236, |
|
"eval_runtime": 866.8231, |
|
"eval_samples_per_second": 1.159, |
|
"eval_steps_per_second": 1.159, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00016520376175548592, |
|
"loss": 1.6396, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00016489028213166143, |
|
"loss": 1.7545, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000164576802507837, |
|
"loss": 1.3772, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00016426332288401255, |
|
"loss": 1.2632, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001639498432601881, |
|
"loss": 1.0961, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00016363636363636366, |
|
"loss": 1.228, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001633228840125392, |
|
"loss": 1.8272, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00016300940438871475, |
|
"loss": 1.2743, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001626959247648903, |
|
"loss": 1.3611, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00016238244514106583, |
|
"loss": 1.2495, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00016206896551724137, |
|
"loss": 1.2027, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00016175548589341694, |
|
"loss": 1.0558, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001614420062695925, |
|
"loss": 1.1034, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00016112852664576803, |
|
"loss": 1.7864, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001608150470219436, |
|
"loss": 1.3586, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00016050156739811912, |
|
"loss": 1.639, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001601880877742947, |
|
"loss": 1.4719, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00015987460815047023, |
|
"loss": 1.7374, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00015956112852664577, |
|
"loss": 1.5463, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00015924764890282134, |
|
"loss": 1.2065, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00015893416927899686, |
|
"loss": 1.1328, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00015862068965517243, |
|
"loss": 1.2139, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00015830721003134797, |
|
"loss": 1.5045, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00015799373040752352, |
|
"loss": 1.676, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00015768025078369906, |
|
"loss": 1.3679, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001573667711598746, |
|
"loss": 1.3023, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00015705329153605017, |
|
"loss": 0.6768, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00015673981191222572, |
|
"loss": 1.2916, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00015642633228840126, |
|
"loss": 1.343, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001561128526645768, |
|
"loss": 1.1307, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00015579937304075237, |
|
"loss": 1.277, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001554858934169279, |
|
"loss": 0.948, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00015517241379310346, |
|
"loss": 1.4819, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.000154858934169279, |
|
"loss": 1.7333, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00015454545454545454, |
|
"loss": 1.302, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00015423197492163011, |
|
"loss": 1.3045, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00015391849529780563, |
|
"loss": 1.2773, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001536050156739812, |
|
"loss": 0.4272, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00015329153605015674, |
|
"loss": 1.3533, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001529780564263323, |
|
"loss": 1.2623, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.399330735206604, |
|
"eval_runtime": 866.2143, |
|
"eval_samples_per_second": 1.16, |
|
"eval_steps_per_second": 1.16, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00015266457680250786, |
|
"loss": 1.2428, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001523510971786834, |
|
"loss": 1.1193, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00015203761755485894, |
|
"loss": 1.7799, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00015172413793103449, |
|
"loss": 1.8581, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00015141065830721003, |
|
"loss": 1.3594, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00015109717868338557, |
|
"loss": 1.1355, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00015078369905956114, |
|
"loss": 1.2911, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00015047021943573669, |
|
"loss": 1.1215, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00015015673981191223, |
|
"loss": 1.3987, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001498432601880878, |
|
"loss": 1.4536, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00014952978056426332, |
|
"loss": 1.3232, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00014921630094043889, |
|
"loss": 1.2199, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00014890282131661443, |
|
"loss": 1.5375, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00014858934169278997, |
|
"loss": 1.6982, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00014827586206896554, |
|
"loss": 1.6182, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00014796238244514106, |
|
"loss": 1.3373, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00014764890282131663, |
|
"loss": 1.4321, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00014733542319749217, |
|
"loss": 1.2147, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00014702194357366771, |
|
"loss": 1.6977, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00014670846394984328, |
|
"loss": 1.562, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001463949843260188, |
|
"loss": 1.3612, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00014608150470219437, |
|
"loss": 1.4421, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001457680250783699, |
|
"loss": 1.201, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00014545454545454546, |
|
"loss": 1.5015, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000145141065830721, |
|
"loss": 1.8102, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00014482758620689657, |
|
"loss": 1.1975, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001445141065830721, |
|
"loss": 1.2328, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00014420062695924766, |
|
"loss": 1.8503, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001438871473354232, |
|
"loss": 0.9917, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00014357366771159874, |
|
"loss": 1.1511, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001432601880877743, |
|
"loss": 1.4227, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00014294670846394983, |
|
"loss": 1.5738, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001426332288401254, |
|
"loss": 1.6898, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00014231974921630097, |
|
"loss": 1.2109, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00014200626959247648, |
|
"loss": 1.5157, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00014169278996865206, |
|
"loss": 1.4976, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001413793103448276, |
|
"loss": 1.9071, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00014106583072100314, |
|
"loss": 1.436, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00014075235109717868, |
|
"loss": 1.4321, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00014043887147335423, |
|
"loss": 1.5108, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.3920130729675293, |
|
"eval_runtime": 866.972, |
|
"eval_samples_per_second": 1.159, |
|
"eval_steps_per_second": 1.159, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001401253918495298, |
|
"loss": 1.2756, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00013981191222570534, |
|
"loss": 1.4882, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00013949843260188088, |
|
"loss": 1.484, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00013918495297805643, |
|
"loss": 1.6371, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.000138871473354232, |
|
"loss": 0.6986, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001385579937304075, |
|
"loss": 1.469, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00013824451410658308, |
|
"loss": 1.4841, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00013793103448275863, |
|
"loss": 1.06, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00013761755485893417, |
|
"loss": 1.7007, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00013730407523510974, |
|
"loss": 1.5835, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00013699059561128526, |
|
"loss": 1.7115, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00013667711598746083, |
|
"loss": 1.1916, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00013636363636363637, |
|
"loss": 1.4264, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001360501567398119, |
|
"loss": 1.6938, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00013573667711598748, |
|
"loss": 1.1683, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00013542319749216303, |
|
"loss": 1.6196, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00013510971786833857, |
|
"loss": 1.1304, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001347962382445141, |
|
"loss": 0.9536, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00013448275862068965, |
|
"loss": 1.4981, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001341692789968652, |
|
"loss": 1.5185, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00013385579937304077, |
|
"loss": 1.2294, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001335423197492163, |
|
"loss": 1.7871, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00013322884012539185, |
|
"loss": 1.2107, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001329153605015674, |
|
"loss": 1.4636, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00013260188087774294, |
|
"loss": 1.1326, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001322884012539185, |
|
"loss": 1.4011, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00013197492163009403, |
|
"loss": 1.4236, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001316614420062696, |
|
"loss": 1.35, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00013134796238244517, |
|
"loss": 1.3837, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00013103448275862068, |
|
"loss": 1.1546, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00013072100313479625, |
|
"loss": 1.4603, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001304075235109718, |
|
"loss": 1.4694, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00013009404388714734, |
|
"loss": 0.9677, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001297805642633229, |
|
"loss": 1.4057, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00012946708463949843, |
|
"loss": 1.5458, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.000129153605015674, |
|
"loss": 1.4101, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00012884012539184954, |
|
"loss": 1.4087, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00012852664576802508, |
|
"loss": 1.2849, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00012821316614420062, |
|
"loss": 1.73, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001278996865203762, |
|
"loss": 1.4046, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.386866569519043, |
|
"eval_runtime": 866.0039, |
|
"eval_samples_per_second": 1.161, |
|
"eval_steps_per_second": 1.161, |
|
"step": 240 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 648, |
|
"num_train_epochs": 1, |
|
"save_steps": 40, |
|
"total_flos": 1.1654508112084992e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|