|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.969227041434456, |
|
"eval_steps": 100, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.875e-05, |
|
"loss": 2.5575, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.75e-05, |
|
"loss": 2.5088, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.625e-05, |
|
"loss": 2.5653, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.5e-05, |
|
"loss": 2.5625, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.374999999999999e-05, |
|
"loss": 2.4798, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001125, |
|
"loss": 2.5331, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00013125, |
|
"loss": 2.6329, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00015, |
|
"loss": 2.5976, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00016874999999999998, |
|
"loss": 2.6081, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00018749999999999998, |
|
"loss": 2.6396, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00020624999999999997, |
|
"loss": 2.5221, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000225, |
|
"loss": 2.6252, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00024375, |
|
"loss": 2.5929, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002625, |
|
"loss": 2.5922, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00028125, |
|
"loss": 2.4996, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0003, |
|
"loss": 2.5948, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000299997027249348, |
|
"loss": 2.5224, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029998810911522207, |
|
"loss": 2.5954, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002999732459511074, |
|
"loss": 2.5453, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029995243834613037, |
|
"loss": 2.6684, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002999256871250353, |
|
"loss": 2.5159, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002998929933481515, |
|
"loss": 2.6213, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002998543583113518, |
|
"loss": 2.6888, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029980978354600055, |
|
"loss": 2.4422, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029975927081889316, |
|
"loss": 2.5714, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002997028221321863, |
|
"loss": 2.6195, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002996404397233182, |
|
"loss": 2.6447, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029957212606492007, |
|
"loss": 2.5583, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029949788386471836, |
|
"loss": 2.5892, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029941771606542696, |
|
"loss": 2.7688, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029933162584463096, |
|
"loss": 2.6723, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029923961661466045, |
|
"loss": 2.7775, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002991416920224554, |
|
"loss": 2.6753, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002990378559494212, |
|
"loss": 2.7772, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002989281125112744, |
|
"loss": 2.8511, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002988124660578801, |
|
"loss": 2.8939, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002986909211730792, |
|
"loss": 2.8698, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002985634826745069, |
|
"loss": 3.0635, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002984301556134016, |
|
"loss": 3.0185, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002982909452744047, |
|
"loss": 3.0233, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002981458571753512, |
|
"loss": 3.1451, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000297994897067051, |
|
"loss": 3.2266, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002978380709330609, |
|
"loss": 3.3143, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002976753849894475, |
|
"loss": 3.3626, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029750684568454063, |
|
"loss": 3.3557, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029733245969867814, |
|
"loss": 3.486, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002971522339439407, |
|
"loss": 3.4997, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002969661755638779, |
|
"loss": 3.6059, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029677429193322556, |
|
"loss": 3.7884, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00029657659065761267, |
|
"loss": 3.9539, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002963730795732607, |
|
"loss": 4.0443, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00029616376674667223, |
|
"loss": 4.2825, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000295948660474312, |
|
"loss": 4.476, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002957277692822774, |
|
"loss": 4.6866, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002955011019259608, |
|
"loss": 5.0977, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00029526866738970286, |
|
"loss": 5.2591, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002950304748864356, |
|
"loss": 5.6691, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00029478653385731817, |
|
"loss": 5.87, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002945368539713617, |
|
"loss": 6.4257, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000294281445125047, |
|
"loss": 6.4306, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002940203174419314, |
|
"loss": 6.9147, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002937534812722483, |
|
"loss": 7.2439, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00029348094719249614, |
|
"loss": 7.2032, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00029320272600501983, |
|
"loss": 7.6343, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002929188287375824, |
|
"loss": 7.7117, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00029262926664292744, |
|
"loss": 7.7917, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00029233405119833387, |
|
"loss": 8.1056, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002920331941051603, |
|
"loss": 8.2865, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002917267072883818, |
|
"loss": 8.6984, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002914146028961167, |
|
"loss": 8.9353, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002910968932991455, |
|
"loss": 8.8638, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002907735910904205, |
|
"loss": 9.0658, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002904447090845662, |
|
"loss": 9.4505, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00029011026031737193, |
|
"loss": 9.4891, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.000289770258045275, |
|
"loss": 9.4665, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028942471574483497, |
|
"loss": 10.0014, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028907364711219997, |
|
"loss": 9.5228, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028871706606256326, |
|
"loss": 9.6988, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028835498672961224, |
|
"loss": 9.8295, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002879874234649679, |
|
"loss": 9.302, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028761439083761596, |
|
"loss": 8.9286, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002872359036333296, |
|
"loss": 8.762, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002868519768540833, |
|
"loss": 8.4371, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002864626257174581, |
|
"loss": 8.2755, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028606786565603875, |
|
"loss": 8.1114, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002856677123168015, |
|
"loss": 7.8714, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028526218156049433, |
|
"loss": 7.7255, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002848512894610083, |
|
"loss": 7.6783, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028443505230474006, |
|
"loss": 7.342, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002840134865899468, |
|
"loss": 7.2997, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028358660902609166, |
|
"loss": 7.2757, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028315443653318225, |
|
"loss": 7.2005, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028271698624109933, |
|
"loss": 7.0789, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028227427548891803, |
|
"loss": 7.043, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002818263218242208, |
|
"loss": 7.1089, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00028137314300240166, |
|
"loss": 6.9611, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00028091475698596236, |
|
"loss": 6.8566, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002804511819438006, |
|
"loss": 7.0051, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002799824362504899, |
|
"loss": 6.8284, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00027950853848555116, |
|
"loss": 6.7849, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 6.93548059463501, |
|
"eval_runtime": 0.4634, |
|
"eval_samples_per_second": 144.584, |
|
"eval_steps_per_second": 10.79, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002790295074327162, |
|
"loss": 6.9271, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00027854536207918336, |
|
"loss": 6.7996, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00027805612161486477, |
|
"loss": 6.7169, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00027756180543162597, |
|
"loss": 6.7617, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.000277062433122517, |
|
"loss": 6.6488, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.000276558024480996, |
|
"loss": 6.7194, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00027604859950014455, |
|
"loss": 6.5303, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002755341783718752, |
|
"loss": 6.6088, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00027501478148613114, |
|
"loss": 6.5223, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002744904294300782, |
|
"loss": 6.5884, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00027396114298728865, |
|
"loss": 6.4799, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002734269431369173, |
|
"loss": 6.5031, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00027288785105287024, |
|
"loss": 6.483, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002723438881029654, |
|
"loss": 6.517, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00027179507584808554, |
|
"loss": 6.4037, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.000271241436041324, |
|
"loss": 6.3645, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00027068299062712195, |
|
"loss": 6.386, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00027011976174039904, |
|
"loss": 6.408, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002695517717056757, |
|
"loss": 6.5254, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002689790430361887, |
|
"loss": 6.2899, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002684015984329983, |
|
"loss": 6.3701, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00026781946078408876, |
|
"loss": 6.3711, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00026723265316346104, |
|
"loss": 6.4739, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00026664119883021843, |
|
"loss": 6.4259, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00026604512122764426, |
|
"loss": 6.3035, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002654444439822729, |
|
"loss": 6.26, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002648391909029534, |
|
"loss": 6.3777, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00026422938597990553, |
|
"loss": 6.3888, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002636150533837691, |
|
"loss": 6.1202, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002629962174646457, |
|
"loss": 6.2316, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00026237290275113386, |
|
"loss": 6.1776, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00026174513394935646, |
|
"loss": 6.1823, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002611129359419817, |
|
"loss": 6.1094, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00026047633378723683, |
|
"loss": 6.076, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002598353527179147, |
|
"loss": 6.0366, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00025919001814037393, |
|
"loss": 6.0301, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00025854035563353166, |
|
"loss": 6.1283, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002578863909478497, |
|
"loss": 5.8362, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00025722815000431406, |
|
"loss": 5.8631, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002565656588934073, |
|
"loss": 5.8031, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002558989438740745, |
|
"loss": 5.7674, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00025522803137268253, |
|
"loss": 5.7825, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002545529479819723, |
|
"loss": 5.8328, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.000253873720460005, |
|
"loss": 5.8554, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002531903757291015, |
|
"loss": 5.8429, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.000252502940874775, |
|
"loss": 5.7095, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00025181144314465764, |
|
"loss": 5.5604, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002511159099474205, |
|
"loss": 5.5383, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00025041636885168715, |
|
"loss": 5.4359, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002497128475849408, |
|
"loss": 5.2092, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002490053740324256, |
|
"loss": 5.2694, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000248293976236041, |
|
"loss": 5.1733, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002475786823932306, |
|
"loss": 4.9904, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002468595208558641, |
|
"loss": 5.0725, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00024613652012911403, |
|
"loss": 5.1754, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00024540970887032543, |
|
"loss": 4.8482, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00024467911588788016, |
|
"loss": 5.0397, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00024394477014005514, |
|
"loss": 5.0625, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002432067007338744, |
|
"loss": 5.1172, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002424649369239553, |
|
"loss": 5.135, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00024171950811134927, |
|
"loss": 4.9951, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00024097044384237607, |
|
"loss": 4.9701, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.000240217773807453, |
|
"loss": 4.9604, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00023946152783991786, |
|
"loss": 5.0689, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002387017359148466, |
|
"loss": 4.9858, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00023793842814786505, |
|
"loss": 5.0683, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00023717163479395538, |
|
"loss": 5.0672, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00023640138624625684, |
|
"loss": 4.9967, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00023562771303486108, |
|
"loss": 5.1249, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00023485064582560197, |
|
"loss": 5.1674, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00023407021541884025, |
|
"loss": 4.9008, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00023328645274824254, |
|
"loss": 5.1389, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00023249938887955543, |
|
"loss": 4.9922, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00023170905500937396, |
|
"loss": 5.0586, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0002309154824639052, |
|
"loss": 5.1404, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00023011870269772642, |
|
"loss": 5.1627, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00022931874729253856, |
|
"loss": 5.0564, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00022851564795591442, |
|
"loss": 5.22, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002277094365200416, |
|
"loss": 5.0221, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00022690014494046104, |
|
"loss": 5.0044, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002260878052948004, |
|
"loss": 5.1381, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00022527244978150248, |
|
"loss": 4.931, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002244541107185491, |
|
"loss": 5.0278, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00022363282054217994, |
|
"loss": 5.0771, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002228086118056072, |
|
"loss": 5.1335, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00022198151717772494, |
|
"loss": 5.0262, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00022115156944181442, |
|
"loss": 5.1367, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00022031880149424462, |
|
"loss": 5.0092, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00021948324634316833, |
|
"loss": 4.9525, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00021864493710721384, |
|
"loss": 5.1732, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00021780390701417216, |
|
"loss": 4.944, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00021696018939968, |
|
"loss": 4.8821, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00021611381770589866, |
|
"loss": 4.9632, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00021526482548018814, |
|
"loss": 4.8977, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00021441324637377768, |
|
"loss": 4.9198, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00021355911414043185, |
|
"loss": 4.8276, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00021270246263511273, |
|
"loss": 4.8213, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00021184332581263785, |
|
"loss": 4.8753, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00021098173772633462, |
|
"loss": 4.9393, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00021011773252669027, |
|
"loss": 5.0001, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 5.125741481781006, |
|
"eval_runtime": 0.4595, |
|
"eval_samples_per_second": 145.823, |
|
"eval_steps_per_second": 10.882, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00020925134445999843, |
|
"loss": 5.0326, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002083826078670016, |
|
"loss": 4.9906, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00020751155718153012, |
|
"loss": 4.9179, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00020663822692913722, |
|
"loss": 5.0525, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002057626517257306, |
|
"loss": 5.191, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00020488486627620036, |
|
"loss": 5.0969, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00020400490537304336, |
|
"loss": 5.1378, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002031228038949843, |
|
"loss": 5.0901, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00020223859680559305, |
|
"loss": 5.2614, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00020135231915189897, |
|
"loss": 5.3052, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00020046400606300177, |
|
"loss": 5.2046, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001995736927486789, |
|
"loss": 5.1249, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019868141449799016, |
|
"loss": 5.3504, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019778720667787894, |
|
"loss": 5.296, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001968911047317703, |
|
"loss": 5.3431, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019599314417816617, |
|
"loss": 5.3026, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019509336060923748, |
|
"loss": 5.1208, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019419178968941344, |
|
"loss": 5.2991, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019328846715396797, |
|
"loss": 5.4355, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019238342880760305, |
|
"loss": 5.3463, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019147671052302992, |
|
"loss": 5.381, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019056834823954683, |
|
"loss": 5.2602, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00018965837796161464, |
|
"loss": 5.6047, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00018874683575742995, |
|
"loss": 5.4004, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001878337577574951, |
|
"loss": 5.3746, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00018691918015318644, |
|
"loss": 5.5042, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001860031391953195, |
|
"loss": 5.6208, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00018508567119271237, |
|
"loss": 5.6673, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00018416681251074633, |
|
"loss": 5.6802, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001832465995699248, |
|
"loss": 5.6571, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00018232506884442932, |
|
"loss": 5.5854, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00018140225686067403, |
|
"loss": 5.6822, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00018047820019585805, |
|
"loss": 5.6988, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00017955293547651535, |
|
"loss": 5.6732, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00017862649937706323, |
|
"loss": 5.628, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00017769892861834867, |
|
"loss": 5.6503, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00017677025996619265, |
|
"loss": 5.7701, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.000175840530229933, |
|
"loss": 5.9086, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00017490977626096558, |
|
"loss": 5.8063, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00017397803495128322, |
|
"loss": 5.7099, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001730453432320137, |
|
"loss": 5.8545, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.000172111738071956, |
|
"loss": 5.7577, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00017117725647611468, |
|
"loss": 5.7155, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001702419354842334, |
|
"loss": 5.614, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001693058121693267, |
|
"loss": 5.8233, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016836892363621052, |
|
"loss": 5.8977, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016743130702003147, |
|
"loss": 5.7225, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016649299948479494, |
|
"loss": 5.6069, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016555403822189214, |
|
"loss": 5.7829, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016461446044862584, |
|
"loss": 5.765, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00016367430340673514, |
|
"loss": 5.7193, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001627336043609196, |
|
"loss": 5.5909, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00016179240059736183, |
|
"loss": 5.7002, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00016085072942224985, |
|
"loss": 5.7926, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00015990862816029836, |
|
"loss": 5.763, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001589661341532692, |
|
"loss": 5.678, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015802328475849142, |
|
"loss": 5.6742, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015708011734738033, |
|
"loss": 5.7914, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015613666930395644, |
|
"loss": 5.6897, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015519297802336354, |
|
"loss": 5.7609, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001542490809103866, |
|
"loss": 5.5896, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00015330501537796906, |
|
"loss": 5.7305, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00015236081884572984, |
|
"loss": 5.6916, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00015141652873848054, |
|
"loss": 5.8479, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00015047218248474148, |
|
"loss": 5.6922, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014952781751525855, |
|
"loss": 5.7687, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014858347126151948, |
|
"loss": 5.939, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014763918115427013, |
|
"loss": 5.7656, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.000146694984622031, |
|
"loss": 5.6402, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0001457509190896134, |
|
"loss": 5.745, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001448070219766365, |
|
"loss": 5.7993, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001438633306960436, |
|
"loss": 5.9042, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001429198826526197, |
|
"loss": 5.9073, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001419767152415086, |
|
"loss": 5.9313, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014103386584673078, |
|
"loss": 5.9989, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00014009137183970167, |
|
"loss": 6.0108, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013914927057775018, |
|
"loss": 5.9416, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001382075994026382, |
|
"loss": 5.8839, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001372663956390804, |
|
"loss": 6.0596, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013632569659326486, |
|
"loss": 6.1034, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013538553955137414, |
|
"loss": 6.1142, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013444596177810783, |
|
"loss": 6.178, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013350700051520506, |
|
"loss": 6.0406, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013256869297996853, |
|
"loss": 6.4111, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013163107636378945, |
|
"loss": 6.1454, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013069418783067326, |
|
"loss": 6.4537, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001297580645157666, |
|
"loss": 6.4003, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001288227435238853, |
|
"loss": 6.3845, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.000127888261928044, |
|
"loss": 6.4016, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00012695465676798627, |
|
"loss": 6.5177, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00012602196504871678, |
|
"loss": 6.4294, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012509022373903442, |
|
"loss": 6.6357, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012415946977006696, |
|
"loss": 6.7642, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012322974003380735, |
|
"loss": 6.9096, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012230107138165133, |
|
"loss": 6.7728, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012137350062293677, |
|
"loss": 6.9269, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012044706452348465, |
|
"loss": 6.9235, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00011952179980414195, |
|
"loss": 6.9825, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00011859774313932597, |
|
"loss": 7.2581, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001176749311555707, |
|
"loss": 7.0916, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 7.345704078674316, |
|
"eval_runtime": 0.4561, |
|
"eval_samples_per_second": 146.905, |
|
"eval_steps_per_second": 10.963, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00011675340043007519, |
|
"loss": 7.2218, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00011583318748925367, |
|
"loss": 7.1207, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00011491432880728765, |
|
"loss": 7.3706, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001139968608046805, |
|
"loss": 7.3684, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00011308081984681356, |
|
"loss": 7.2527, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00011216624224250487, |
|
"loss": 7.281, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00011125316424257002, |
|
"loss": 7.3978, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00011034162203838534, |
|
"loss": 7.3763, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00010943165176045317, |
|
"loss": 7.3376, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00010852328947697004, |
|
"loss": 7.2407, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0001076165711923969, |
|
"loss": 7.2541, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00010671153284603203, |
|
"loss": 7.4064, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0001058082103105865, |
|
"loss": 7.2814, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0001049066393907625, |
|
"loss": 7.2034, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00010400685582183382, |
|
"loss": 7.2216, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00010310889526822966, |
|
"loss": 7.2839, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00010221279332212101, |
|
"loss": 7.151, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00010131858550200983, |
|
"loss": 7.0071, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00010042630725132104, |
|
"loss": 7.2225, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.953599393699819e-05, |
|
"loss": 7.0026, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.8647680848101e-05, |
|
"loss": 6.9343, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.776140319440695e-05, |
|
"loss": 6.8403, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.687719610501572e-05, |
|
"loss": 6.8452, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.599509462695665e-05, |
|
"loss": 6.7219, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.511513372379965e-05, |
|
"loss": 6.6855, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.423734827426941e-05, |
|
"loss": 6.6364, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.336177307086277e-05, |
|
"loss": 6.4916, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.24884428184699e-05, |
|
"loss": 6.5035, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.161739213299841e-05, |
|
"loss": 6.4218, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.074865554000161e-05, |
|
"loss": 6.3434, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 8.988226747330973e-05, |
|
"loss": 6.2743, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 8.90182622736654e-05, |
|
"loss": 6.4393, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.815667418736217e-05, |
|
"loss": 6.3851, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.729753736488734e-05, |
|
"loss": 6.3961, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.644088585956816e-05, |
|
"loss": 6.4392, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.558675362622229e-05, |
|
"loss": 6.3019, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.473517451981186e-05, |
|
"loss": 6.4448, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.38861822941013e-05, |
|
"loss": 6.3658, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.303981060031993e-05, |
|
"loss": 6.3605, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.219609298582788e-05, |
|
"loss": 6.374, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.135506289278618e-05, |
|
"loss": 6.4577, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.051675365683163e-05, |
|
"loss": 6.3441, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.968119850575538e-05, |
|
"loss": 6.2837, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.884843055818558e-05, |
|
"loss": 6.4202, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.801848282227504e-05, |
|
"loss": 6.3048, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.719138819439281e-05, |
|
"loss": 6.2596, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.636717945782003e-05, |
|
"loss": 6.396, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.554588928145088e-05, |
|
"loss": 6.4539, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.47275502184975e-05, |
|
"loss": 6.391, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.391219470519957e-05, |
|
"loss": 6.3495, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.309985505953892e-05, |
|
"loss": 6.5338, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.229056347995841e-05, |
|
"loss": 6.5981, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.148435204408557e-05, |
|
"loss": 6.4835, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.068125270746138e-05, |
|
"loss": 6.5864, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.98812973022736e-05, |
|
"loss": 6.6236, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.908451753609481e-05, |
|
"loss": 6.6355, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.829094499062603e-05, |
|
"loss": 6.7108, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.750061112044455e-05, |
|
"loss": 6.6534, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.671354725175742e-05, |
|
"loss": 6.7027, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.592978458115978e-05, |
|
"loss": 6.7136, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.514935417439802e-05, |
|
"loss": 6.6459, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.43722869651389e-05, |
|
"loss": 6.7461, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.359861375374315e-05, |
|
"loss": 6.697, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 6.28283652060446e-05, |
|
"loss": 6.8183, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 6.206157185213493e-05, |
|
"loss": 6.8525, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 6.12982640851534e-05, |
|
"loss": 6.9362, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 6.0538472160082105e-05, |
|
"loss": 6.7913, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.978222619254696e-05, |
|
"loss": 6.6714, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.902955615762392e-05, |
|
"loss": 6.8751, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.828049188865071e-05, |
|
"loss": 6.718, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.753506307604464e-05, |
|
"loss": 6.8452, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.67932992661256e-05, |
|
"loss": 6.8406, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.605522985994481e-05, |
|
"loss": 6.8587, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.5320884112119776e-05, |
|
"loss": 6.9518, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.4590291129674564e-05, |
|
"loss": 7.0429, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.386347987088592e-05, |
|
"loss": 6.7883, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.314047914413587e-05, |
|
"loss": 6.7102, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.2421317606769455e-05, |
|
"loss": 6.678, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.1706023763959004e-05, |
|
"loss": 6.9334, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.099462596757441e-05, |
|
"loss": 6.817, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.0287152415059226e-05, |
|
"loss": 6.8474, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.958363114831286e-05, |
|
"loss": 6.7632, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.888409005257946e-05, |
|
"loss": 6.6268, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.8188556855342355e-05, |
|
"loss": 6.5044, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.749705912522501e-05, |
|
"loss": 6.7882, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.680962427089849e-05, |
|
"loss": 6.6334, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.6126279539995005e-05, |
|
"loss": 6.8479, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.544705201802772e-05, |
|
"loss": 6.6996, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.477196862731747e-05, |
|
"loss": 6.6673, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.41010561259255e-05, |
|
"loss": 6.6098, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.343434110659271e-05, |
|
"loss": 6.8179, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.277184999568594e-05, |
|
"loss": 6.7396, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.2113609052150335e-05, |
|
"loss": 6.5957, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.145964436646837e-05, |
|
"loss": 6.5761, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.080998185962606e-05, |
|
"loss": 6.6821, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.0164647282085296e-05, |
|
"loss": 6.6868, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.9523666212763166e-05, |
|
"loss": 6.7086, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.8887064058018244e-05, |
|
"loss": 6.6119, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.825486605064354e-05, |
|
"loss": 6.5869, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.7627097248866136e-05, |
|
"loss": 6.8201, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 6.829553127288818, |
|
"eval_runtime": 0.4568, |
|
"eval_samples_per_second": 146.658, |
|
"eval_steps_per_second": 10.945, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.700378253535427e-05, |
|
"loss": 6.584, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.6384946616230933e-05, |
|
"loss": 6.6932, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.577061402009446e-05, |
|
"loss": 6.6921, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.5160809097046586e-05, |
|
"loss": 6.6652, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.4555556017727096e-05, |
|
"loss": 6.5146, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.395487877235575e-05, |
|
"loss": 6.5751, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.335880116978154e-05, |
|
"loss": 6.4599, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.276734683653894e-05, |
|
"loss": 6.5829, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.2180539215911254e-05, |
|
"loss": 6.7763, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.15984015670017e-05, |
|
"loss": 6.6207, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.1020956963811285e-05, |
|
"loss": 6.6804, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.0448228294324255e-05, |
|
"loss": 6.754, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.988023825960095e-05, |
|
"loss": 6.6705, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.9317009372878037e-05, |
|
"loss": 6.6933, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.8758563958675974e-05, |
|
"loss": 6.7342, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.8204924151914428e-05, |
|
"loss": 6.6217, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.765611189703461e-05, |
|
"loss": 6.4585, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.7112148947129736e-05, |
|
"loss": 6.5491, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.6573056863082698e-05, |
|
"loss": 6.5655, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.603885701271133e-05, |
|
"loss": 6.7607, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.550957056992174e-05, |
|
"loss": 6.6543, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.498521851386886e-05, |
|
"loss": 6.5007, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.4465821628124837e-05, |
|
"loss": 6.6124, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.3951400499855446e-05, |
|
"loss": 6.5528, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.344197551900398e-05, |
|
"loss": 6.7489, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.293756687748297e-05, |
|
"loss": 6.6461, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.2438194568374007e-05, |
|
"loss": 6.6335, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.1943878385135227e-05, |
|
"loss": 6.5654, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.1454637920816646e-05, |
|
"loss": 6.5755, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.0970492567283765e-05, |
|
"loss": 6.6072, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.0491461514448803e-05, |
|
"loss": 6.578, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.001756374951006e-05, |
|
"loss": 6.6676, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.9548818056199377e-05, |
|
"loss": 6.4739, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.908524301403764e-05, |
|
"loss": 6.6598, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.8626856997598355e-05, |
|
"loss": 6.5202, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.817367817577915e-05, |
|
"loss": 6.5515, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.7725724511081924e-05, |
|
"loss": 6.4875, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.72830137589007e-05, |
|
"loss": 6.6304, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.6845563466817745e-05, |
|
"loss": 6.6853, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.6413390973908342e-05, |
|
"loss": 6.6227, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.5986513410053247e-05, |
|
"loss": 6.8159, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.556494769525991e-05, |
|
"loss": 6.578, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.5148710538991727e-05, |
|
"loss": 6.5347, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4737818439505656e-05, |
|
"loss": 6.8032, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.433228768319853e-05, |
|
"loss": 6.6454, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.3932134343961265e-05, |
|
"loss": 6.5615, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.3537374282541847e-05, |
|
"loss": 6.5985, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.314802314591667e-05, |
|
"loss": 6.5956, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.276409636667038e-05, |
|
"loss": 6.6768, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.2385609162384019e-05, |
|
"loss": 6.5845, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.2012576535032087e-05, |
|
"loss": 6.513, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.1645013270387738e-05, |
|
"loss": 6.6015, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.1282933937436721e-05, |
|
"loss": 6.5111, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.0926352887800033e-05, |
|
"loss": 6.5472, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.0575284255164989e-05, |
|
"loss": 6.7356, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.022974195472499e-05, |
|
"loss": 6.7244, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.889739682628034e-06, |
|
"loss": 6.7113, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.555290915433821e-06, |
|
"loss": 6.6869, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.226408909579519e-06, |
|
"loss": 6.7583, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.903106700854423e-06, |
|
"loss": 6.687, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.585397103883296e-06, |
|
"loss": 6.6874, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 8.2732927116182e-06, |
|
"loss": 6.5792, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.966805894839656e-06, |
|
"loss": 6.7618, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.665948801666139e-06, |
|
"loss": 6.9631, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.370733357072539e-06, |
|
"loss": 6.6466, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.081171262417606e-06, |
|
"loss": 6.5379, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.797273994980118e-06, |
|
"loss": 6.6883, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.5190528075038436e-06, |
|
"loss": 6.6152, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.246518727751704e-06, |
|
"loss": 6.5607, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.979682558068566e-06, |
|
"loss": 6.9042, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.718554874952991e-06, |
|
"loss": 6.5923, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.463146028638249e-06, |
|
"loss": 6.5547, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5.213466142681832e-06, |
|
"loss": 6.5457, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.969525113564327e-06, |
|
"loss": 6.8205, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.7313326102971225e-06, |
|
"loss": 6.5781, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.498898074039126e-06, |
|
"loss": 6.6173, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.272230717722602e-06, |
|
"loss": 6.6039, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.051339525687991e-06, |
|
"loss": 6.7091, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.83623325332772e-06, |
|
"loss": 6.7178, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.6269204267392825e-06, |
|
"loss": 6.6408, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.4234093423872786e-06, |
|
"loss": 6.548, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.2257080667744407e-06, |
|
"loss": 6.8339, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.0338244361220564e-06, |
|
"loss": 6.6542, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.8477660560593196e-06, |
|
"loss": 6.4732, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.6675403013218355e-06, |
|
"loss": 6.7991, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.4931543154593223e-06, |
|
"loss": 6.7538, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.3246150105525054e-06, |
|
"loss": 6.5082, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.161929066939083e-06, |
|
"loss": 6.8893, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.005102932948993e-06, |
|
"loss": 6.639, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.8541428246487966e-06, |
|
"loss": 6.6773, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.7090547255952935e-06, |
|
"loss": 6.6149, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5698443865983789e-06, |
|
"loss": 6.6495, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.4365173254930585e-06, |
|
"loss": 6.7779, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.309078826920773e-06, |
|
"loss": 6.7067, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1875339421199004e-06, |
|
"loss": 6.6096, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.0718874887256146e-06, |
|
"loss": 6.4771, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.621440505788225e-07, |
|
"loss": 6.7159, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.583079775445423e-07, |
|
"loss": 6.5673, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 7.603833853395247e-07, |
|
"loss": 6.6136, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.68374155369017e-07, |
|
"loss": 6.6013, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 6.8933210372924805, |
|
"eval_runtime": 0.4586, |
|
"eval_samples_per_second": 146.102, |
|
"eval_steps_per_second": 10.903, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 515, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 4108329021603840.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|