|
{ |
|
"best_metric": 1.3731473684310913, |
|
"best_model_checkpoint": "outputs_llama-2/checkpoint-400", |
|
"epoch": 0.6170458927882762, |
|
"eval_steps": 40, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2e-05, |
|
"loss": 3.4868, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4e-05, |
|
"loss": 3.7961, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6e-05, |
|
"loss": 3.9088, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8e-05, |
|
"loss": 3.4984, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001, |
|
"loss": 3.2276, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00012, |
|
"loss": 3.0655, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00014, |
|
"loss": 3.3618, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016, |
|
"loss": 2.7163, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00018, |
|
"loss": 2.5164, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4623, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019968652037617558, |
|
"loss": 2.5072, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001993730407523511, |
|
"loss": 2.3718, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019905956112852667, |
|
"loss": 1.9166, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001987460815047022, |
|
"loss": 1.8158, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019843260188087775, |
|
"loss": 1.6101, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001981191222570533, |
|
"loss": 1.7501, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019780564263322884, |
|
"loss": 1.8495, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001974921630094044, |
|
"loss": 1.7858, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019717868338557995, |
|
"loss": 1.511, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001968652037617555, |
|
"loss": 1.3787, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019655172413793104, |
|
"loss": 1.8197, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001962382445141066, |
|
"loss": 1.4238, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019592476489028212, |
|
"loss": 1.6311, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001956112852664577, |
|
"loss": 1.7413, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019529780564263324, |
|
"loss": 1.6356, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019498432601880878, |
|
"loss": 1.5826, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019467084639498435, |
|
"loss": 1.3187, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019435736677115987, |
|
"loss": 1.9504, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019404388714733544, |
|
"loss": 1.5303, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019373040752351098, |
|
"loss": 1.9262, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019341692789968652, |
|
"loss": 1.3492, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001931034482758621, |
|
"loss": 1.5133, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001927899686520376, |
|
"loss": 1.7638, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019247648902821318, |
|
"loss": 1.4146, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019216300940438872, |
|
"loss": 2.0048, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019184952978056427, |
|
"loss": 1.1199, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001915360501567398, |
|
"loss": 1.5077, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019122257053291538, |
|
"loss": 1.8042, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019090909090909092, |
|
"loss": 1.3575, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019059561128526647, |
|
"loss": 1.6653, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.4491230249404907, |
|
"eval_runtime": 866.4575, |
|
"eval_samples_per_second": 1.16, |
|
"eval_steps_per_second": 1.16, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000190282131661442, |
|
"loss": 1.0084, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00018996865203761755, |
|
"loss": 1.841, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00018965517241379312, |
|
"loss": 1.5954, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00018934169278996866, |
|
"loss": 1.4138, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001890282131661442, |
|
"loss": 1.6798, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00018871473354231978, |
|
"loss": 1.2323, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001884012539184953, |
|
"loss": 1.1051, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00018808777429467086, |
|
"loss": 1.2045, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001877742946708464, |
|
"loss": 1.7915, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00018746081504702195, |
|
"loss": 1.5733, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00018714733542319752, |
|
"loss": 1.1321, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00018683385579937304, |
|
"loss": 1.5533, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001865203761755486, |
|
"loss": 1.6388, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00018620689655172415, |
|
"loss": 1.7207, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001858934169278997, |
|
"loss": 1.6191, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00018557993730407524, |
|
"loss": 1.1936, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001852664576802508, |
|
"loss": 0.8021, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00018495297805642635, |
|
"loss": 1.6352, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001846394984326019, |
|
"loss": 1.3609, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00018432601880877744, |
|
"loss": 1.1234, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00018401253918495298, |
|
"loss": 1.4164, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018369905956112855, |
|
"loss": 1.2983, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018338557993730406, |
|
"loss": 1.6532, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018307210031347963, |
|
"loss": 0.8675, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018275862068965518, |
|
"loss": 1.3623, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018244514106583072, |
|
"loss": 1.7138, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001821316614420063, |
|
"loss": 1.6071, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018181818181818183, |
|
"loss": 1.3497, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018150470219435738, |
|
"loss": 1.7226, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018119122257053292, |
|
"loss": 1.5776, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018087774294670846, |
|
"loss": 1.4415, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018056426332288403, |
|
"loss": 1.3818, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018025078369905958, |
|
"loss": 1.14, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00017993730407523512, |
|
"loss": 1.1541, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00017962382445141066, |
|
"loss": 1.4013, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001793103448275862, |
|
"loss": 2.0154, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00017899686520376175, |
|
"loss": 1.4794, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00017868338557993732, |
|
"loss": 1.4989, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00017836990595611286, |
|
"loss": 1.6, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001780564263322884, |
|
"loss": 1.3883, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.4160501956939697, |
|
"eval_runtime": 865.4029, |
|
"eval_samples_per_second": 1.161, |
|
"eval_steps_per_second": 1.161, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00017774294670846398, |
|
"loss": 1.0814, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001774294670846395, |
|
"loss": 1.4005, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00017711598746081506, |
|
"loss": 1.576, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001768025078369906, |
|
"loss": 1.6388, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00017648902821316615, |
|
"loss": 1.3295, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00017617554858934172, |
|
"loss": 1.7523, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00017586206896551723, |
|
"loss": 1.7137, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001755485893416928, |
|
"loss": 1.3514, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00017523510971786835, |
|
"loss": 1.5445, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001749216300940439, |
|
"loss": 1.454, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00017460815047021943, |
|
"loss": 1.4337, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000174294670846395, |
|
"loss": 1.572, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00017398119122257055, |
|
"loss": 1.3782, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001736677115987461, |
|
"loss": 1.3746, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00017335423197492163, |
|
"loss": 1.5409, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00017304075235109718, |
|
"loss": 1.5832, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00017272727272727275, |
|
"loss": 1.3129, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00017241379310344826, |
|
"loss": 1.3966, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00017210031347962383, |
|
"loss": 1.7874, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001717868338557994, |
|
"loss": 1.0902, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00017147335423197492, |
|
"loss": 1.617, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001711598746081505, |
|
"loss": 1.2597, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00017084639498432603, |
|
"loss": 1.5788, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00017053291536050158, |
|
"loss": 1.3218, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00017021943573667712, |
|
"loss": 1.3153, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00016990595611285266, |
|
"loss": 1.2564, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00016959247648902823, |
|
"loss": 1.8, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00016927899686520377, |
|
"loss": 1.3895, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00016896551724137932, |
|
"loss": 1.3897, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00016865203761755486, |
|
"loss": 1.4656, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001683385579937304, |
|
"loss": 0.7588, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00016802507836990597, |
|
"loss": 0.9988, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00016771159874608152, |
|
"loss": 1.8016, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00016739811912225706, |
|
"loss": 1.5793, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001670846394984326, |
|
"loss": 1.5504, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00016677115987460817, |
|
"loss": 1.3969, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001664576802507837, |
|
"loss": 1.0452, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00016614420062695926, |
|
"loss": 1.7136, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001658307210031348, |
|
"loss": 1.6306, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00016551724137931035, |
|
"loss": 1.2302, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.4042863845825195, |
|
"eval_runtime": 866.713, |
|
"eval_samples_per_second": 1.16, |
|
"eval_steps_per_second": 1.16, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00016520376175548592, |
|
"loss": 1.6392, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00016489028213166143, |
|
"loss": 1.7501, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000164576802507837, |
|
"loss": 1.3811, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00016426332288401255, |
|
"loss": 1.2695, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001639498432601881, |
|
"loss": 1.0973, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00016363636363636366, |
|
"loss": 1.2281, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001633228840125392, |
|
"loss": 1.8312, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00016300940438871475, |
|
"loss": 1.2764, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001626959247648903, |
|
"loss": 1.3624, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00016238244514106583, |
|
"loss": 1.2449, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00016206896551724137, |
|
"loss": 1.2079, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00016175548589341694, |
|
"loss": 1.0546, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001614420062695925, |
|
"loss": 1.1052, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00016112852664576803, |
|
"loss": 1.789, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001608150470219436, |
|
"loss": 1.357, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00016050156739811912, |
|
"loss": 1.6354, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001601880877742947, |
|
"loss": 1.4717, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00015987460815047023, |
|
"loss": 1.7371, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00015956112852664577, |
|
"loss": 1.5482, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00015924764890282134, |
|
"loss": 1.202, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00015893416927899686, |
|
"loss": 1.1362, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00015862068965517243, |
|
"loss": 1.2072, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00015830721003134797, |
|
"loss": 1.5035, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00015799373040752352, |
|
"loss": 1.6734, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00015768025078369906, |
|
"loss": 1.3708, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001573667711598746, |
|
"loss": 1.3016, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00015705329153605017, |
|
"loss": 0.6789, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00015673981191222572, |
|
"loss": 1.2877, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00015642633228840126, |
|
"loss": 1.3416, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001561128526645768, |
|
"loss": 1.1312, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00015579937304075237, |
|
"loss": 1.2825, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001554858934169279, |
|
"loss": 0.9458, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00015517241379310346, |
|
"loss": 1.4786, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.000154858934169279, |
|
"loss": 1.734, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00015454545454545454, |
|
"loss": 1.302, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00015423197492163011, |
|
"loss": 1.3067, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00015391849529780563, |
|
"loss": 1.2758, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001536050156739812, |
|
"loss": 0.4271, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00015329153605015674, |
|
"loss": 1.3511, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001529780564263323, |
|
"loss": 1.2637, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.3989219665527344, |
|
"eval_runtime": 866.2996, |
|
"eval_samples_per_second": 1.16, |
|
"eval_steps_per_second": 1.16, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00015266457680250786, |
|
"loss": 1.2459, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001523510971786834, |
|
"loss": 1.1139, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00015203761755485894, |
|
"loss": 1.7787, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00015172413793103449, |
|
"loss": 1.8546, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00015141065830721003, |
|
"loss": 1.3567, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00015109717868338557, |
|
"loss": 1.1376, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00015078369905956114, |
|
"loss": 1.2866, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00015047021943573669, |
|
"loss": 1.1168, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00015015673981191223, |
|
"loss": 1.3957, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001498432601880878, |
|
"loss": 1.4531, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00014952978056426332, |
|
"loss": 1.3238, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00014921630094043889, |
|
"loss": 1.2166, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00014890282131661443, |
|
"loss": 1.5402, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00014858934169278997, |
|
"loss": 1.6967, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00014827586206896554, |
|
"loss": 1.6176, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00014796238244514106, |
|
"loss": 1.3383, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00014764890282131663, |
|
"loss": 1.4409, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00014733542319749217, |
|
"loss": 1.2145, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00014702194357366771, |
|
"loss": 1.7027, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00014670846394984328, |
|
"loss": 1.5586, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001463949843260188, |
|
"loss": 1.3585, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00014608150470219437, |
|
"loss": 1.4356, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001457680250783699, |
|
"loss": 1.202, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00014545454545454546, |
|
"loss": 1.4999, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000145141065830721, |
|
"loss": 1.8095, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00014482758620689657, |
|
"loss": 1.1945, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001445141065830721, |
|
"loss": 1.2339, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00014420062695924766, |
|
"loss": 1.8472, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001438871473354232, |
|
"loss": 0.9892, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00014357366771159874, |
|
"loss": 1.1517, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001432601880877743, |
|
"loss": 1.4252, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00014294670846394983, |
|
"loss": 1.5793, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001426332288401254, |
|
"loss": 1.6862, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00014231974921630097, |
|
"loss": 1.2111, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00014200626959247648, |
|
"loss": 1.5163, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00014169278996865206, |
|
"loss": 1.4963, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001413793103448276, |
|
"loss": 1.906, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00014106583072100314, |
|
"loss": 1.4359, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00014075235109717868, |
|
"loss": 1.4324, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00014043887147335423, |
|
"loss": 1.513, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.3920646905899048, |
|
"eval_runtime": 866.473, |
|
"eval_samples_per_second": 1.16, |
|
"eval_steps_per_second": 1.16, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001401253918495298, |
|
"loss": 1.2709, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00013981191222570534, |
|
"loss": 1.4844, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00013949843260188088, |
|
"loss": 1.4832, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00013918495297805643, |
|
"loss": 1.6388, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.000138871473354232, |
|
"loss": 0.7068, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001385579937304075, |
|
"loss": 1.4712, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00013824451410658308, |
|
"loss": 1.4861, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00013793103448275863, |
|
"loss": 1.0516, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00013761755485893417, |
|
"loss": 1.7038, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00013730407523510974, |
|
"loss": 1.5841, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00013699059561128526, |
|
"loss": 1.7073, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00013667711598746083, |
|
"loss": 1.1921, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00013636363636363637, |
|
"loss": 1.4323, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001360501567398119, |
|
"loss": 1.6943, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00013573667711598748, |
|
"loss": 1.1724, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00013542319749216303, |
|
"loss": 1.6194, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00013510971786833857, |
|
"loss": 1.1307, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001347962382445141, |
|
"loss": 0.9491, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00013448275862068965, |
|
"loss": 1.4941, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001341692789968652, |
|
"loss": 1.519, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00013385579937304077, |
|
"loss": 1.2309, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001335423197492163, |
|
"loss": 1.7851, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00013322884012539185, |
|
"loss": 1.2136, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001329153605015674, |
|
"loss": 1.4588, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00013260188087774294, |
|
"loss": 1.1362, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001322884012539185, |
|
"loss": 1.3981, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00013197492163009403, |
|
"loss": 1.417, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001316614420062696, |
|
"loss": 1.3526, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00013134796238244517, |
|
"loss": 1.3844, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00013103448275862068, |
|
"loss": 1.1524, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00013072100313479625, |
|
"loss": 1.46, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001304075235109718, |
|
"loss": 1.4659, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00013009404388714734, |
|
"loss": 0.9684, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001297805642633229, |
|
"loss": 1.4079, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00012946708463949843, |
|
"loss": 1.5447, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.000129153605015674, |
|
"loss": 1.4095, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00012884012539184954, |
|
"loss": 1.4073, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00012852664576802508, |
|
"loss": 1.2867, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00012821316614420062, |
|
"loss": 1.7357, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001278996865203762, |
|
"loss": 1.4032, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.386928915977478, |
|
"eval_runtime": 866.195, |
|
"eval_samples_per_second": 1.16, |
|
"eval_steps_per_second": 1.16, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00012758620689655174, |
|
"loss": 1.4204, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00012727272727272728, |
|
"loss": 1.1404, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00012695924764890282, |
|
"loss": 1.7556, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00012664576802507837, |
|
"loss": 1.1598, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00012633228840125394, |
|
"loss": 1.6847, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00012601880877742945, |
|
"loss": 0.9023, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00012570532915360502, |
|
"loss": 1.7739, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001253918495297806, |
|
"loss": 1.2681, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001250783699059561, |
|
"loss": 1.5614, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00012476489028213168, |
|
"loss": 1.7507, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00012445141065830722, |
|
"loss": 1.205, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00012413793103448277, |
|
"loss": 1.0752, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001238244514106583, |
|
"loss": 1.6259, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00012351097178683385, |
|
"loss": 1.2839, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00012319749216300942, |
|
"loss": 1.5367, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00012288401253918497, |
|
"loss": 1.3161, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001225705329153605, |
|
"loss": 1.5973, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00012225705329153605, |
|
"loss": 1.3614, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00012194357366771161, |
|
"loss": 0.9446, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00012163009404388714, |
|
"loss": 1.3996, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00012131661442006271, |
|
"loss": 1.4435, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00012100313479623827, |
|
"loss": 1.059, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001206896551724138, |
|
"loss": 1.1291, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00012037617554858935, |
|
"loss": 1.4702, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001200626959247649, |
|
"loss": 1.3586, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00011974921630094045, |
|
"loss": 1.6694, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00011943573667711598, |
|
"loss": 1.2642, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00011912225705329154, |
|
"loss": 1.1625, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001188087774294671, |
|
"loss": 1.6103, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00011849529780564264, |
|
"loss": 1.3178, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001181818181818182, |
|
"loss": 1.3575, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00011786833855799372, |
|
"loss": 1.4692, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001175548589341693, |
|
"loss": 1.7414, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00011724137931034482, |
|
"loss": 1.6938, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00011692789968652038, |
|
"loss": 1.9068, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00011661442006269594, |
|
"loss": 1.6176, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00011630094043887148, |
|
"loss": 1.5824, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00011598746081504704, |
|
"loss": 1.5442, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00011567398119122257, |
|
"loss": 1.322, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00011536050156739812, |
|
"loss": 1.6088, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.3828775882720947, |
|
"eval_runtime": 866.9525, |
|
"eval_samples_per_second": 1.159, |
|
"eval_steps_per_second": 1.159, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00011504702194357367, |
|
"loss": 1.2647, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00011473354231974922, |
|
"loss": 1.6157, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00011442006269592478, |
|
"loss": 1.6007, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00011410658307210031, |
|
"loss": 1.7898, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00011379310344827588, |
|
"loss": 1.2325, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00011347962382445141, |
|
"loss": 1.2579, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00011316614420062696, |
|
"loss": 1.3584, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00011285266457680251, |
|
"loss": 1.3461, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00011253918495297806, |
|
"loss": 1.1491, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00011222570532915362, |
|
"loss": 1.4842, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00011191222570532915, |
|
"loss": 1.1456, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00011159874608150471, |
|
"loss": 1.1362, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00011128526645768025, |
|
"loss": 1.591, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001109717868338558, |
|
"loss": 1.5453, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00011065830721003134, |
|
"loss": 1.6698, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001103448275862069, |
|
"loss": 1.9426, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00011003134796238246, |
|
"loss": 1.4073, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00010971786833855799, |
|
"loss": 0.9123, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00010940438871473355, |
|
"loss": 1.6831, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00010909090909090909, |
|
"loss": 1.294, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00010877742946708465, |
|
"loss": 1.5178, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001084639498432602, |
|
"loss": 1.2052, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00010815047021943574, |
|
"loss": 1.1377, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00010783699059561129, |
|
"loss": 1.6875, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00010752351097178684, |
|
"loss": 1.1238, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00010721003134796239, |
|
"loss": 1.0325, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00010689655172413792, |
|
"loss": 0.9661, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00010658307210031349, |
|
"loss": 1.1979, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00010626959247648905, |
|
"loss": 1.2015, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00010595611285266458, |
|
"loss": 1.6322, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00010564263322884013, |
|
"loss": 1.3118, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00010532915360501568, |
|
"loss": 1.5565, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00010501567398119123, |
|
"loss": 0.8889, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00010470219435736676, |
|
"loss": 0.983, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00010438871473354232, |
|
"loss": 1.3105, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00010407523510971788, |
|
"loss": 1.2417, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00010376175548589342, |
|
"loss": 1.1782, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00010344827586206898, |
|
"loss": 1.5382, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00010313479623824452, |
|
"loss": 1.3938, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00010282131661442008, |
|
"loss": 0.9485, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.3797812461853027, |
|
"eval_runtime": 866.3779, |
|
"eval_samples_per_second": 1.16, |
|
"eval_steps_per_second": 1.16, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001025078369905956, |
|
"loss": 1.5318, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00010219435736677116, |
|
"loss": 1.5941, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00010188087774294672, |
|
"loss": 1.5669, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00010156739811912226, |
|
"loss": 1.409, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00010125391849529782, |
|
"loss": 1.5028, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00010094043887147335, |
|
"loss": 1.2918, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001006269592476489, |
|
"loss": 1.5101, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00010031347962382445, |
|
"loss": 1.2542, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3369, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.968652037617555e-05, |
|
"loss": 1.3911, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.93730407523511e-05, |
|
"loss": 1.3189, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.905956112852665e-05, |
|
"loss": 1.1879, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.87460815047022e-05, |
|
"loss": 1.4755, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.843260188087775e-05, |
|
"loss": 0.8884, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.81191222570533e-05, |
|
"loss": 1.4322, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.780564263322885e-05, |
|
"loss": 1.2872, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.749216300940439e-05, |
|
"loss": 1.717, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.717868338557993e-05, |
|
"loss": 1.4597, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.686520376175549e-05, |
|
"loss": 1.3438, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.655172413793105e-05, |
|
"loss": 1.0354, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.623824451410659e-05, |
|
"loss": 1.8091, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.592476489028213e-05, |
|
"loss": 1.4617, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.561128526645769e-05, |
|
"loss": 1.4417, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.529780564263323e-05, |
|
"loss": 1.0994, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.498432601880878e-05, |
|
"loss": 1.1481, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.467084639498433e-05, |
|
"loss": 1.0104, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.435736677115989e-05, |
|
"loss": 1.0098, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.404388714733543e-05, |
|
"loss": 1.9093, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.373040752351098e-05, |
|
"loss": 0.9269, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.341692789968652e-05, |
|
"loss": 0.606, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.310344827586207e-05, |
|
"loss": 1.5762, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.278996865203762e-05, |
|
"loss": 1.1424, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.247648902821317e-05, |
|
"loss": 1.6305, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.216300940438872e-05, |
|
"loss": 1.1738, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.184952978056427e-05, |
|
"loss": 0.973, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.153605015673982e-05, |
|
"loss": 1.4229, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.122257053291536e-05, |
|
"loss": 1.3226, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.090909090909092e-05, |
|
"loss": 1.6308, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.059561128526646e-05, |
|
"loss": 1.5685, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.028213166144202e-05, |
|
"loss": 1.5146, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.3761860132217407, |
|
"eval_runtime": 866.1927, |
|
"eval_samples_per_second": 1.16, |
|
"eval_steps_per_second": 1.16, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.996865203761756e-05, |
|
"loss": 1.333, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.96551724137931e-05, |
|
"loss": 1.4217, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.934169278996866e-05, |
|
"loss": 1.2831, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.90282131661442e-05, |
|
"loss": 1.4965, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.871473354231975e-05, |
|
"loss": 1.5408, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.84012539184953e-05, |
|
"loss": 1.3491, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.808777429467086e-05, |
|
"loss": 1.2511, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.77742946708464e-05, |
|
"loss": 1.7709, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.746081504702195e-05, |
|
"loss": 1.4589, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.71473354231975e-05, |
|
"loss": 1.2527, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.683385579937305e-05, |
|
"loss": 1.483, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.652037617554859e-05, |
|
"loss": 1.0561, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.620689655172413e-05, |
|
"loss": 1.52, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.58934169278997e-05, |
|
"loss": 1.6113, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.557993730407524e-05, |
|
"loss": 1.6251, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.526645768025079e-05, |
|
"loss": 1.1801, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.495297805642633e-05, |
|
"loss": 1.2712, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.463949843260189e-05, |
|
"loss": 1.3207, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.432601880877743e-05, |
|
"loss": 1.7274, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.401253918495299e-05, |
|
"loss": 0.8838, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.369905956112853e-05, |
|
"loss": 1.5105, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.338557993730409e-05, |
|
"loss": 1.4059, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.307210031347963e-05, |
|
"loss": 1.4178, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.275862068965517e-05, |
|
"loss": 1.513, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.244514106583072e-05, |
|
"loss": 0.773, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.213166144200627e-05, |
|
"loss": 1.5264, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.181818181818183e-05, |
|
"loss": 1.2271, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.150470219435737e-05, |
|
"loss": 1.8335, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.119122257053292e-05, |
|
"loss": 1.333, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.087774294670847e-05, |
|
"loss": 1.0521, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.056426332288402e-05, |
|
"loss": 1.6083, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.025078369905956e-05, |
|
"loss": 1.3099, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.993730407523512e-05, |
|
"loss": 1.3217, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.962382445141067e-05, |
|
"loss": 1.5504, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.931034482758621e-05, |
|
"loss": 1.2699, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.899686520376176e-05, |
|
"loss": 0.8664, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.86833855799373e-05, |
|
"loss": 1.6929, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.836990595611286e-05, |
|
"loss": 1.2003, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.80564263322884e-05, |
|
"loss": 1.6811, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.774294670846394e-05, |
|
"loss": 1.6352, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.3731473684310913, |
|
"eval_runtime": 866.2054, |
|
"eval_samples_per_second": 1.16, |
|
"eval_steps_per_second": 1.16, |
|
"step": 400 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 648, |
|
"num_train_epochs": 1, |
|
"save_steps": 40, |
|
"total_flos": 1.9141431690264576e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|