|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.08445945945945946, |
|
"eval_steps": 25, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0008445945945945946, |
|
"grad_norm": 7.727255344390869, |
|
"learning_rate": 2e-05, |
|
"loss": 8.372, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0008445945945945946, |
|
"eval_loss": 8.063016891479492, |
|
"eval_runtime": 125.6054, |
|
"eval_samples_per_second": 3.973, |
|
"eval_steps_per_second": 1.99, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0016891891891891893, |
|
"grad_norm": 9.669200897216797, |
|
"learning_rate": 4e-05, |
|
"loss": 8.046, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002533783783783784, |
|
"grad_norm": 9.18079948425293, |
|
"learning_rate": 6e-05, |
|
"loss": 8.0321, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0033783783783783786, |
|
"grad_norm": 9.363329887390137, |
|
"learning_rate": 8e-05, |
|
"loss": 7.0182, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.004222972972972973, |
|
"grad_norm": 9.816231727600098, |
|
"learning_rate": 0.0001, |
|
"loss": 7.7225, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005067567567567568, |
|
"grad_norm": 9.099088668823242, |
|
"learning_rate": 0.00012, |
|
"loss": 7.875, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0059121621621621625, |
|
"grad_norm": 9.296707153320312, |
|
"learning_rate": 0.00014, |
|
"loss": 5.6422, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.006756756756756757, |
|
"grad_norm": 9.570911407470703, |
|
"learning_rate": 0.00016, |
|
"loss": 3.8105, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.007601351351351352, |
|
"grad_norm": 8.131881713867188, |
|
"learning_rate": 0.00018, |
|
"loss": 2.0885, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.008445945945945946, |
|
"grad_norm": 5.208433628082275, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0562, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.009290540540540541, |
|
"grad_norm": 10.91145133972168, |
|
"learning_rate": 0.0001999390827019096, |
|
"loss": 2.9782, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.010135135135135136, |
|
"grad_norm": 17.846271514892578, |
|
"learning_rate": 0.00019975640502598244, |
|
"loss": 3.6074, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01097972972972973, |
|
"grad_norm": 8.735333442687988, |
|
"learning_rate": 0.00019945218953682734, |
|
"loss": 0.805, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.011824324324324325, |
|
"grad_norm": 6.351312637329102, |
|
"learning_rate": 0.00019902680687415705, |
|
"loss": 1.1463, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01266891891891892, |
|
"grad_norm": 2.7280004024505615, |
|
"learning_rate": 0.00019848077530122083, |
|
"loss": 0.2077, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.013513513513513514, |
|
"grad_norm": 5.975780487060547, |
|
"learning_rate": 0.00019781476007338058, |
|
"loss": 1.1476, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.014358108108108109, |
|
"grad_norm": 5.841843605041504, |
|
"learning_rate": 0.00019702957262759965, |
|
"loss": 1.0156, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.015202702702702704, |
|
"grad_norm": 6.997043132781982, |
|
"learning_rate": 0.0001961261695938319, |
|
"loss": 0.6625, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.016047297297297296, |
|
"grad_norm": 7.254495620727539, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 0.8055, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.016891891891891893, |
|
"grad_norm": 5.706070423126221, |
|
"learning_rate": 0.00019396926207859084, |
|
"loss": 0.3771, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.017736486486486486, |
|
"grad_norm": 3.621236801147461, |
|
"learning_rate": 0.00019271838545667876, |
|
"loss": 0.2341, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.018581081081081082, |
|
"grad_norm": 1.3116642236709595, |
|
"learning_rate": 0.0001913545457642601, |
|
"loss": 0.0388, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.019425675675675675, |
|
"grad_norm": 6.483520030975342, |
|
"learning_rate": 0.0001898794046299167, |
|
"loss": 0.7181, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.02027027027027027, |
|
"grad_norm": 16.79340171813965, |
|
"learning_rate": 0.00018829475928589271, |
|
"loss": 1.0507, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.021114864864864864, |
|
"grad_norm": 3.9302544593811035, |
|
"learning_rate": 0.00018660254037844388, |
|
"loss": 0.8626, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.021114864864864864, |
|
"eval_loss": 0.4980570375919342, |
|
"eval_runtime": 126.9443, |
|
"eval_samples_per_second": 3.931, |
|
"eval_steps_per_second": 1.969, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02195945945945946, |
|
"grad_norm": 3.3856303691864014, |
|
"learning_rate": 0.0001848048096156426, |
|
"loss": 0.3799, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.022804054054054054, |
|
"grad_norm": 4.26168966293335, |
|
"learning_rate": 0.00018290375725550417, |
|
"loss": 0.5836, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.02364864864864865, |
|
"grad_norm": 4.655025959014893, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 0.6228, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.024493243243243243, |
|
"grad_norm": 5.267085075378418, |
|
"learning_rate": 0.00017880107536067218, |
|
"loss": 0.354, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.02533783783783784, |
|
"grad_norm": 3.4175171852111816, |
|
"learning_rate": 0.0001766044443118978, |
|
"loss": 0.3025, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.026182432432432432, |
|
"grad_norm": 3.4233884811401367, |
|
"learning_rate": 0.00017431448254773944, |
|
"loss": 0.3893, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.02702702702702703, |
|
"grad_norm": 0.795799195766449, |
|
"learning_rate": 0.0001719339800338651, |
|
"loss": 0.0842, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.02787162162162162, |
|
"grad_norm": 2.771843671798706, |
|
"learning_rate": 0.00016946583704589973, |
|
"loss": 0.5852, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.028716216216216218, |
|
"grad_norm": 2.5016746520996094, |
|
"learning_rate": 0.00016691306063588583, |
|
"loss": 0.467, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.02956081081081081, |
|
"grad_norm": 3.512495994567871, |
|
"learning_rate": 0.00016427876096865394, |
|
"loss": 0.1078, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.030405405405405407, |
|
"grad_norm": 2.59957218170166, |
|
"learning_rate": 0.0001615661475325658, |
|
"loss": 0.2617, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03125, |
|
"grad_norm": 1.0484542846679688, |
|
"learning_rate": 0.00015877852522924732, |
|
"loss": 0.0766, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.03209459459459459, |
|
"grad_norm": 12.746214866638184, |
|
"learning_rate": 0.0001559192903470747, |
|
"loss": 0.3286, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.032939189189189186, |
|
"grad_norm": 0.457368403673172, |
|
"learning_rate": 0.0001529919264233205, |
|
"loss": 0.0239, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.033783783783783786, |
|
"grad_norm": 4.603786945343018, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.2592, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03462837837837838, |
|
"grad_norm": 4.357280731201172, |
|
"learning_rate": 0.00014694715627858908, |
|
"loss": 0.3277, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03547297297297297, |
|
"grad_norm": 5.606065273284912, |
|
"learning_rate": 0.00014383711467890774, |
|
"loss": 0.9881, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.036317567567567564, |
|
"grad_norm": 0.958200216293335, |
|
"learning_rate": 0.00014067366430758004, |
|
"loss": 0.0339, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.037162162162162164, |
|
"grad_norm": 0.9110239744186401, |
|
"learning_rate": 0.00013746065934159123, |
|
"loss": 0.0388, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03800675675675676, |
|
"grad_norm": 10.007410049438477, |
|
"learning_rate": 0.00013420201433256689, |
|
"loss": 0.1342, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03885135135135135, |
|
"grad_norm": 3.0555267333984375, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 0.1913, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03969594594594594, |
|
"grad_norm": 4.296360969543457, |
|
"learning_rate": 0.0001275637355816999, |
|
"loss": 0.2431, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.04054054054054054, |
|
"grad_norm": 0.9275014400482178, |
|
"learning_rate": 0.00012419218955996676, |
|
"loss": 0.0389, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.041385135135135136, |
|
"grad_norm": 0.2515358626842499, |
|
"learning_rate": 0.00012079116908177593, |
|
"loss": 0.0064, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.04222972972972973, |
|
"grad_norm": 0.12618118524551392, |
|
"learning_rate": 0.00011736481776669306, |
|
"loss": 0.0036, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04222972972972973, |
|
"eval_loss": 0.24848654866218567, |
|
"eval_runtime": 127.0152, |
|
"eval_samples_per_second": 3.929, |
|
"eval_steps_per_second": 1.968, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04307432432432432, |
|
"grad_norm": 5.329013347625732, |
|
"learning_rate": 0.00011391731009600654, |
|
"loss": 0.2995, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.04391891891891892, |
|
"grad_norm": 2.9597175121307373, |
|
"learning_rate": 0.00011045284632676536, |
|
"loss": 0.1657, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.044763513513513514, |
|
"grad_norm": 4.471776485443115, |
|
"learning_rate": 0.00010697564737441252, |
|
"loss": 0.2049, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.04560810810810811, |
|
"grad_norm": 1.612620234489441, |
|
"learning_rate": 0.00010348994967025012, |
|
"loss": 0.0311, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0464527027027027, |
|
"grad_norm": 8.52501392364502, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4772, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0472972972972973, |
|
"grad_norm": 11.812604904174805, |
|
"learning_rate": 9.651005032974994e-05, |
|
"loss": 0.6968, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.04814189189189189, |
|
"grad_norm": 3.004138708114624, |
|
"learning_rate": 9.302435262558747e-05, |
|
"loss": 0.66, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.048986486486486486, |
|
"grad_norm": 7.490670204162598, |
|
"learning_rate": 8.954715367323468e-05, |
|
"loss": 0.5833, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.04983108108108108, |
|
"grad_norm": 3.6796233654022217, |
|
"learning_rate": 8.608268990399349e-05, |
|
"loss": 0.5957, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.05067567567567568, |
|
"grad_norm": 0.1634790003299713, |
|
"learning_rate": 8.263518223330697e-05, |
|
"loss": 0.0041, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05152027027027027, |
|
"grad_norm": 6.999042510986328, |
|
"learning_rate": 7.920883091822408e-05, |
|
"loss": 0.4254, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.052364864864864864, |
|
"grad_norm": 1.4367730617523193, |
|
"learning_rate": 7.580781044003324e-05, |
|
"loss": 0.0328, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.05320945945945946, |
|
"grad_norm": 4.265451908111572, |
|
"learning_rate": 7.243626441830009e-05, |
|
"loss": 0.1511, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.05405405405405406, |
|
"grad_norm": 2.716522693634033, |
|
"learning_rate": 6.909830056250527e-05, |
|
"loss": 0.2104, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.05489864864864865, |
|
"grad_norm": 1.9555131196975708, |
|
"learning_rate": 6.579798566743314e-05, |
|
"loss": 0.1398, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.05574324324324324, |
|
"grad_norm": 2.594125509262085, |
|
"learning_rate": 6.25393406584088e-05, |
|
"loss": 0.2959, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.056587837837837836, |
|
"grad_norm": 4.196368217468262, |
|
"learning_rate": 5.9326335692419995e-05, |
|
"loss": 0.2828, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.057432432432432436, |
|
"grad_norm": 7.690390110015869, |
|
"learning_rate": 5.616288532109225e-05, |
|
"loss": 0.6751, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.05827702702702703, |
|
"grad_norm": 5.08601713180542, |
|
"learning_rate": 5.305284372141095e-05, |
|
"loss": 0.7061, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.05912162162162162, |
|
"grad_norm": 0.743725597858429, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 0.0161, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.059966216216216214, |
|
"grad_norm": 1.1107006072998047, |
|
"learning_rate": 4.700807357667952e-05, |
|
"loss": 0.0289, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.060810810810810814, |
|
"grad_norm": 3.6318747997283936, |
|
"learning_rate": 4.4080709652925336e-05, |
|
"loss": 0.2351, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.06165540540540541, |
|
"grad_norm": 3.365870714187622, |
|
"learning_rate": 4.12214747707527e-05, |
|
"loss": 0.2887, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 2.29145884513855, |
|
"learning_rate": 3.843385246743417e-05, |
|
"loss": 0.2137, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.0633445945945946, |
|
"grad_norm": 0.12063544243574142, |
|
"learning_rate": 3.5721239031346066e-05, |
|
"loss": 0.0043, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0633445945945946, |
|
"eval_loss": 0.14590471982955933, |
|
"eval_runtime": 126.9422, |
|
"eval_samples_per_second": 3.931, |
|
"eval_steps_per_second": 1.969, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06418918918918919, |
|
"grad_norm": 0.20781630277633667, |
|
"learning_rate": 3.308693936411421e-05, |
|
"loss": 0.0065, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.06503378378378379, |
|
"grad_norm": 0.7386759519577026, |
|
"learning_rate": 3.053416295410026e-05, |
|
"loss": 0.0431, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.06587837837837837, |
|
"grad_norm": 3.0190958976745605, |
|
"learning_rate": 2.8066019966134904e-05, |
|
"loss": 0.2471, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.06672297297297297, |
|
"grad_norm": 4.477311134338379, |
|
"learning_rate": 2.5685517452260567e-05, |
|
"loss": 0.8406, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.06756756756756757, |
|
"grad_norm": 0.9087244272232056, |
|
"learning_rate": 2.339555568810221e-05, |
|
"loss": 0.0187, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06841216216216216, |
|
"grad_norm": 2.032402276992798, |
|
"learning_rate": 2.119892463932781e-05, |
|
"loss": 0.3885, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.06925675675675676, |
|
"grad_norm": 5.099620819091797, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"loss": 0.1469, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.07010135135135136, |
|
"grad_norm": 1.4657068252563477, |
|
"learning_rate": 1.7096242744495837e-05, |
|
"loss": 0.0909, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.07094594594594594, |
|
"grad_norm": 0.7036009430885315, |
|
"learning_rate": 1.5195190384357404e-05, |
|
"loss": 0.0208, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.07179054054054054, |
|
"grad_norm": 7.542021751403809, |
|
"learning_rate": 1.339745962155613e-05, |
|
"loss": 1.0755, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.07263513513513513, |
|
"grad_norm": 0.7535510063171387, |
|
"learning_rate": 1.1705240714107302e-05, |
|
"loss": 0.0495, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.07347972972972973, |
|
"grad_norm": 2.493562936782837, |
|
"learning_rate": 1.0120595370083318e-05, |
|
"loss": 0.2186, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.07432432432432433, |
|
"grad_norm": 3.849121332168579, |
|
"learning_rate": 8.645454235739903e-06, |
|
"loss": 0.2773, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.07516891891891891, |
|
"grad_norm": 4.295147895812988, |
|
"learning_rate": 7.281614543321269e-06, |
|
"loss": 0.448, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.07601351351351351, |
|
"grad_norm": 3.61863112449646, |
|
"learning_rate": 6.030737921409169e-06, |
|
"loss": 0.329, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07685810810810811, |
|
"grad_norm": 5.776922702789307, |
|
"learning_rate": 4.8943483704846475e-06, |
|
"loss": 0.4583, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0777027027027027, |
|
"grad_norm": 2.3311469554901123, |
|
"learning_rate": 3.873830406168111e-06, |
|
"loss": 0.2086, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0785472972972973, |
|
"grad_norm": 0.32096993923187256, |
|
"learning_rate": 2.970427372400353e-06, |
|
"loss": 0.0184, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.07939189189189189, |
|
"grad_norm": 0.9880800843238831, |
|
"learning_rate": 2.1852399266194314e-06, |
|
"loss": 0.0559, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.08023648648648649, |
|
"grad_norm": 5.171363830566406, |
|
"learning_rate": 1.5192246987791981e-06, |
|
"loss": 0.4847, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.08108108108108109, |
|
"grad_norm": 3.456587314605713, |
|
"learning_rate": 9.731931258429638e-07, |
|
"loss": 0.2026, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.08192567567567567, |
|
"grad_norm": 3.7345874309539795, |
|
"learning_rate": 5.478104631726711e-07, |
|
"loss": 0.5642, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.08277027027027027, |
|
"grad_norm": 4.7123541831970215, |
|
"learning_rate": 2.4359497401758024e-07, |
|
"loss": 0.4377, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.08361486486486487, |
|
"grad_norm": 2.566887617111206, |
|
"learning_rate": 6.09172980904238e-08, |
|
"loss": 0.2207, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.08445945945945946, |
|
"grad_norm": 1.7636704444885254, |
|
"learning_rate": 0.0, |
|
"loss": 0.0955, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08445945945945946, |
|
"eval_loss": 0.14757972955703735, |
|
"eval_runtime": 126.9347, |
|
"eval_samples_per_second": 3.931, |
|
"eval_steps_per_second": 1.97, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.32218823376896e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|