{ "best_metric": 1.9268525838851929, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 2.0073319755600814, "eval_steps": 25, "global_step": 154, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.013034623217922607, "grad_norm": 0.6276217103004456, "learning_rate": 7.5e-05, "loss": 2.9674, "step": 1 }, { "epoch": 0.013034623217922607, "eval_loss": 3.5546200275421143, "eval_runtime": 0.2814, "eval_samples_per_second": 177.672, "eval_steps_per_second": 46.195, "step": 1 }, { "epoch": 0.026069246435845215, "grad_norm": 0.6501481533050537, "learning_rate": 0.00015, "loss": 3.2018, "step": 2 }, { "epoch": 0.039103869653767824, "grad_norm": 0.5839198231697083, "learning_rate": 0.000225, "loss": 3.3157, "step": 3 }, { "epoch": 0.05213849287169043, "grad_norm": 0.5366824865341187, "learning_rate": 0.0003, "loss": 3.3337, "step": 4 }, { "epoch": 0.06517311608961303, "grad_norm": 0.4950920641422272, "learning_rate": 0.0002999703922691041, "loss": 3.3257, "step": 5 }, { "epoch": 0.07820773930753565, "grad_norm": 0.4459711015224457, "learning_rate": 0.00029988158206334587, "loss": 3.2473, "step": 6 }, { "epoch": 0.09124236252545825, "grad_norm": 0.5029334425926208, "learning_rate": 0.00029973360833781664, "loss": 3.251, "step": 7 }, { "epoch": 0.10427698574338086, "grad_norm": 0.5529475808143616, "learning_rate": 0.0002995265359986831, "loss": 3.212, "step": 8 }, { "epoch": 0.11731160896130347, "grad_norm": 0.6158551573753357, "learning_rate": 0.00029926045587471686, "loss": 3.1893, "step": 9 }, { "epoch": 0.13034623217922606, "grad_norm": 0.6573907136917114, "learning_rate": 0.0002989354846774545, "loss": 3.1782, "step": 10 }, { "epoch": 0.14338085539714868, "grad_norm": 0.6919009685516357, "learning_rate": 0.0002985517649500034, "loss": 3.0629, "step": 11 }, { "epoch": 0.1564154786150713, "grad_norm": 0.7953146696090698, "learning_rate": 0.00029810946500451814, "loss": 3.132, "step": 12 }, { "epoch": 0.1694501018329939, "grad_norm": 0.9705007672309875, "learning_rate": 0.00029760877884837294, "loss": 2.7258, "step": 13 }, { "epoch": 0.1824847250509165, "grad_norm": 0.9804863333702087, "learning_rate": 0.0002970499260990637, "loss": 2.7512, "step": 14 }, { "epoch": 0.1955193482688391, "grad_norm": 0.6821589469909668, "learning_rate": 0.0002964331518878766, "loss": 2.6437, "step": 15 }, { "epoch": 0.20855397148676172, "grad_norm": 0.4936249554157257, "learning_rate": 0.0002957587267523652, "loss": 2.6137, "step": 16 }, { "epoch": 0.2215885947046843, "grad_norm": 0.5652141571044922, "learning_rate": 0.00029502694651768383, "loss": 2.6432, "step": 17 }, { "epoch": 0.23462321792260693, "grad_norm": 0.6153628826141357, "learning_rate": 0.000294238132166829, "loss": 2.6485, "step": 18 }, { "epoch": 0.24765784114052952, "grad_norm": 0.4337889552116394, "learning_rate": 0.0002933926296998457, "loss": 2.5919, "step": 19 }, { "epoch": 0.2606924643584521, "grad_norm": 0.5410802364349365, "learning_rate": 0.0002924908099820599, "loss": 2.6103, "step": 20 }, { "epoch": 0.27372708757637476, "grad_norm": 0.5030240416526794, "learning_rate": 0.00029153306858140533, "loss": 2.6104, "step": 21 }, { "epoch": 0.28676171079429735, "grad_norm": 0.38985803723335266, "learning_rate": 0.00029051982559491393, "loss": 2.5794, "step": 22 }, { "epoch": 0.29979633401221994, "grad_norm": 0.5979215502738953, "learning_rate": 0.00028945152546444754, "loss": 2.6287, "step": 23 }, { "epoch": 0.3128309572301426, "grad_norm": 0.5737847089767456, "learning_rate": 0.0002883286367817511, "loss": 2.6541, "step": 24 }, { "epoch": 0.3258655804480652, "grad_norm": 0.572758138179779, "learning_rate": 0.00028715165208291265, "loss": 2.6881, "step": 25 }, { "epoch": 0.3258655804480652, "eval_loss": 2.5153815746307373, "eval_runtime": 0.2797, "eval_samples_per_second": 178.771, "eval_steps_per_second": 46.481, "step": 25 }, { "epoch": 0.3389002036659878, "grad_norm": 1.4206351041793823, "learning_rate": 0.0002859210876323207, "loss": 2.4658, "step": 26 }, { "epoch": 0.35193482688391037, "grad_norm": 1.5302618741989136, "learning_rate": 0.00028463748319621396, "loss": 2.5304, "step": 27 }, { "epoch": 0.364969450101833, "grad_norm": 0.9723348021507263, "learning_rate": 0.00028330140180592156, "loss": 2.4213, "step": 28 }, { "epoch": 0.3780040733197556, "grad_norm": 0.4521373212337494, "learning_rate": 0.0002819134295108992, "loss": 2.4048, "step": 29 }, { "epoch": 0.3910386965376782, "grad_norm": 0.6521058678627014, "learning_rate": 0.00028047417512166837, "loss": 2.4019, "step": 30 }, { "epoch": 0.4040733197556008, "grad_norm": 0.7668533325195312, "learning_rate": 0.00027898426994277204, "loss": 2.4534, "step": 31 }, { "epoch": 0.41710794297352344, "grad_norm": 0.791877269744873, "learning_rate": 0.0002774443674958634, "loss": 2.4232, "step": 32 }, { "epoch": 0.43014256619144603, "grad_norm": 0.814365565776825, "learning_rate": 0.00027585514323305, "loss": 2.4323, "step": 33 }, { "epoch": 0.4431771894093686, "grad_norm": 0.6510496735572815, "learning_rate": 0.00027421729424061787, "loss": 2.3904, "step": 34 }, { "epoch": 0.45621181262729127, "grad_norm": 0.42264309525489807, "learning_rate": 0.00027253153893326646, "loss": 2.4016, "step": 35 }, { "epoch": 0.46924643584521386, "grad_norm": 0.5813013911247253, "learning_rate": 0.0002707986167389884, "loss": 2.4511, "step": 36 }, { "epoch": 0.48228105906313645, "grad_norm": 0.8300145864486694, "learning_rate": 0.0002690192877747315, "loss": 2.4128, "step": 37 }, { "epoch": 0.49531568228105904, "grad_norm": 0.7689920663833618, "learning_rate": 0.0002671943325129871, "loss": 2.3281, "step": 38 }, { "epoch": 0.5083503054989816, "grad_norm": 1.024122953414917, "learning_rate": 0.0002653245514394482, "loss": 2.3381, "step": 39 }, { "epoch": 0.5213849287169042, "grad_norm": 0.6759045720100403, "learning_rate": 0.0002634107647018905, "loss": 2.3008, "step": 40 }, { "epoch": 0.5344195519348269, "grad_norm": 0.3922083377838135, "learning_rate": 0.0002614538117504284, "loss": 2.2719, "step": 41 }, { "epoch": 0.5474541751527495, "grad_norm": 0.5142776966094971, "learning_rate": 0.0002594545509693043, "loss": 2.276, "step": 42 }, { "epoch": 0.5604887983706721, "grad_norm": 0.6373997926712036, "learning_rate": 0.00025741385930037295, "loss": 2.2602, "step": 43 }, { "epoch": 0.5735234215885947, "grad_norm": 0.6760398149490356, "learning_rate": 0.00025533263185844587, "loss": 2.3169, "step": 44 }, { "epoch": 0.5865580448065173, "grad_norm": 0.7617467045783997, "learning_rate": 0.00025321178153866423, "loss": 2.2778, "step": 45 }, { "epoch": 0.5995926680244399, "grad_norm": 0.7649106979370117, "learning_rate": 0.00025105223861607306, "loss": 2.2692, "step": 46 }, { "epoch": 0.6126272912423625, "grad_norm": 0.6090757250785828, "learning_rate": 0.0002488549503375719, "loss": 2.2845, "step": 47 }, { "epoch": 0.6256619144602852, "grad_norm": 0.45042526721954346, "learning_rate": 0.0002466208805064206, "loss": 2.2431, "step": 48 }, { "epoch": 0.6386965376782078, "grad_norm": 0.46896523237228394, "learning_rate": 0.00024435100905948387, "loss": 2.2985, "step": 49 }, { "epoch": 0.6517311608961304, "grad_norm": 0.5649595260620117, "learning_rate": 0.00024204633163739828, "loss": 2.4211, "step": 50 }, { "epoch": 0.6517311608961304, "eval_loss": 2.2425832748413086, "eval_runtime": 0.2798, "eval_samples_per_second": 178.69, "eval_steps_per_second": 46.46, "step": 50 }, { "epoch": 0.664765784114053, "grad_norm": 1.1472358703613281, "learning_rate": 0.00023970785914785144, "loss": 2.2791, "step": 51 }, { "epoch": 0.6778004073319756, "grad_norm": 1.3499984741210938, "learning_rate": 0.00023733661732216452, "loss": 2.3292, "step": 52 }, { "epoch": 0.6908350305498981, "grad_norm": 1.2178796529769897, "learning_rate": 0.00023493364626537257, "loss": 2.2938, "step": 53 }, { "epoch": 0.7038696537678207, "grad_norm": 0.9463868737220764, "learning_rate": 0.00023249999999999999, "loss": 2.2173, "step": 54 }, { "epoch": 0.7169042769857433, "grad_norm": 0.5750055909156799, "learning_rate": 0.00023003674600373153, "loss": 2.2036, "step": 55 }, { "epoch": 0.729938900203666, "grad_norm": 0.3371862769126892, "learning_rate": 0.00022754496474118133, "loss": 2.167, "step": 56 }, { "epoch": 0.7429735234215886, "grad_norm": 0.6258431673049927, "learning_rate": 0.00022502574918996517, "loss": 2.2211, "step": 57 }, { "epoch": 0.7560081466395112, "grad_norm": 0.7280526161193848, "learning_rate": 0.00022248020436128478, "loss": 2.2122, "step": 58 }, { "epoch": 0.7690427698574338, "grad_norm": 0.8275133371353149, "learning_rate": 0.00021990944681523302, "loss": 2.2137, "step": 59 }, { "epoch": 0.7820773930753564, "grad_norm": 0.8840050101280212, "learning_rate": 0.0002173146041710339, "loss": 2.2311, "step": 60 }, { "epoch": 0.795112016293279, "grad_norm": 0.957916796207428, "learning_rate": 0.00021469681461243153, "loss": 2.2478, "step": 61 }, { "epoch": 0.8081466395112016, "grad_norm": 1.0393691062927246, "learning_rate": 0.00021205722638844505, "loss": 2.2579, "step": 62 }, { "epoch": 0.8211812627291243, "grad_norm": 0.3389835059642792, "learning_rate": 0.00020939699730970873, "loss": 2.1799, "step": 63 }, { "epoch": 0.8342158859470469, "grad_norm": 0.5756048560142517, "learning_rate": 0.00020671729424061788, "loss": 2.1684, "step": 64 }, { "epoch": 0.8472505091649695, "grad_norm": 0.7830222249031067, "learning_rate": 0.00020401929258750365, "loss": 2.1438, "step": 65 }, { "epoch": 0.8602851323828921, "grad_norm": 0.8117411732673645, "learning_rate": 0.00020130417578306082, "loss": 2.1536, "step": 66 }, { "epoch": 0.8733197556008147, "grad_norm": 0.7848119735717773, "learning_rate": 0.0001985731347672554, "loss": 2.129, "step": 67 }, { "epoch": 0.8863543788187372, "grad_norm": 0.6703603863716125, "learning_rate": 0.00019582736746493853, "loss": 2.152, "step": 68 }, { "epoch": 0.8993890020366598, "grad_norm": 0.4845719635486603, "learning_rate": 0.00019306807826039747, "loss": 2.1237, "step": 69 }, { "epoch": 0.9124236252545825, "grad_norm": 0.3355918228626251, "learning_rate": 0.00019029647746907283, "loss": 2.0711, "step": 70 }, { "epoch": 0.9254582484725051, "grad_norm": 0.33707523345947266, "learning_rate": 0.00018751378080667378, "loss": 2.1033, "step": 71 }, { "epoch": 0.9384928716904277, "grad_norm": 0.5474686622619629, "learning_rate": 0.00018472120885592555, "loss": 2.0707, "step": 72 }, { "epoch": 0.9515274949083503, "grad_norm": 0.71000075340271, "learning_rate": 0.00018191998653118108, "loss": 2.1169, "step": 73 }, { "epoch": 0.9645621181262729, "grad_norm": 0.8560431003570557, "learning_rate": 0.0001791113425411332, "loss": 2.176, "step": 74 }, { "epoch": 0.9775967413441955, "grad_norm": 1.2276349067687988, "learning_rate": 0.0001762965088498626, "loss": 2.2966, "step": 75 }, { "epoch": 0.9775967413441955, "eval_loss": 2.044175148010254, "eval_runtime": 0.2806, "eval_samples_per_second": 178.197, "eval_steps_per_second": 46.331, "step": 75 }, { "epoch": 0.9906313645621181, "grad_norm": 0.28324252367019653, "learning_rate": 0.0001734767201364573, "loss": 2.0594, "step": 76 }, { "epoch": 1.0036659877800407, "grad_norm": 0.4252139627933502, "learning_rate": 0.00017065321325344194, "loss": 2.9023, "step": 77 }, { "epoch": 1.0167006109979633, "grad_norm": 0.532595157623291, "learning_rate": 0.00016782722668425316, "loss": 1.9476, "step": 78 }, { "epoch": 1.0297352342158859, "grad_norm": 0.696269690990448, "learning_rate": 0.000165, "loss": 2.089, "step": 79 }, { "epoch": 1.0427698574338085, "grad_norm": 0.6571292281150818, "learning_rate": 0.00016217277331574678, "loss": 2.0514, "step": 80 }, { "epoch": 1.055804480651731, "grad_norm": 0.5735260248184204, "learning_rate": 0.00015934678674655805, "loss": 2.0645, "step": 81 }, { "epoch": 1.0688391038696539, "grad_norm": 0.418550968170166, "learning_rate": 0.0001565232798635427, "loss": 2.0639, "step": 82 }, { "epoch": 1.0818737270875765, "grad_norm": 0.32165512442588806, "learning_rate": 0.00015370349115013742, "loss": 2.0412, "step": 83 }, { "epoch": 1.094908350305499, "grad_norm": 0.3411344289779663, "learning_rate": 0.0001508886574588668, "loss": 2.0738, "step": 84 }, { "epoch": 1.1079429735234216, "grad_norm": 0.4526989459991455, "learning_rate": 0.0001480800134688189, "loss": 2.0482, "step": 85 }, { "epoch": 1.1209775967413442, "grad_norm": 0.5264050960540771, "learning_rate": 0.00014527879114407445, "loss": 2.0155, "step": 86 }, { "epoch": 1.1340122199592668, "grad_norm": 0.6333541870117188, "learning_rate": 0.0001424862191933262, "loss": 2.029, "step": 87 }, { "epoch": 1.1470468431771894, "grad_norm": 0.6475998759269714, "learning_rate": 0.00013970352253092714, "loss": 2.0732, "step": 88 }, { "epoch": 1.160081466395112, "grad_norm": 0.5682183504104614, "learning_rate": 0.00013693192173960253, "loss": 1.6717, "step": 89 }, { "epoch": 1.1731160896130346, "grad_norm": 0.48593777418136597, "learning_rate": 0.00013417263253506147, "loss": 2.5498, "step": 90 }, { "epoch": 1.1861507128309572, "grad_norm": 0.517917811870575, "learning_rate": 0.00013142686523274463, "loss": 2.0097, "step": 91 }, { "epoch": 1.1991853360488798, "grad_norm": 0.5828862190246582, "learning_rate": 0.00012869582421693912, "loss": 1.9987, "step": 92 }, { "epoch": 1.2122199592668024, "grad_norm": 0.5273678302764893, "learning_rate": 0.00012598070741249632, "loss": 2.0205, "step": 93 }, { "epoch": 1.225254582484725, "grad_norm": 0.49020346999168396, "learning_rate": 0.00012328270575938212, "loss": 1.9981, "step": 94 }, { "epoch": 1.2382892057026478, "grad_norm": 0.35303086042404175, "learning_rate": 0.00012060300269029128, "loss": 1.9699, "step": 95 }, { "epoch": 1.2513238289205701, "grad_norm": 0.27531367540359497, "learning_rate": 0.00011794277361155495, "loss": 2.0035, "step": 96 }, { "epoch": 1.264358452138493, "grad_norm": 0.31903597712516785, "learning_rate": 0.00011530318538756846, "loss": 1.9783, "step": 97 }, { "epoch": 1.2773930753564156, "grad_norm": 0.43081673979759216, "learning_rate": 0.0001126853958289661, "loss": 2.0053, "step": 98 }, { "epoch": 1.2904276985743381, "grad_norm": 0.5114902257919312, "learning_rate": 0.00011009055318476698, "loss": 2.0032, "step": 99 }, { "epoch": 1.3034623217922607, "grad_norm": 0.6454872488975525, "learning_rate": 0.00010751979563871518, "loss": 2.0436, "step": 100 }, { "epoch": 1.3034623217922607, "eval_loss": 1.9786142110824585, "eval_runtime": 0.2787, "eval_samples_per_second": 179.386, "eval_steps_per_second": 46.64, "step": 100 }, { "epoch": 1.3164969450101833, "grad_norm": 0.7522782683372498, "learning_rate": 0.00010497425081003482, "loss": 2.1272, "step": 101 }, { "epoch": 1.329531568228106, "grad_norm": 0.7502726316452026, "learning_rate": 0.0001024550352588187, "loss": 2.2586, "step": 102 }, { "epoch": 1.3425661914460285, "grad_norm": 0.4233168661594391, "learning_rate": 9.996325399626841e-05, "loss": 1.871, "step": 103 }, { "epoch": 1.355600814663951, "grad_norm": 0.5849415063858032, "learning_rate": 9.750000000000003e-05, "loss": 1.9998, "step": 104 }, { "epoch": 1.3686354378818737, "grad_norm": 0.5961419343948364, "learning_rate": 9.506635373462745e-05, "loss": 2.0286, "step": 105 }, { "epoch": 1.3816700610997963, "grad_norm": 0.5850256085395813, "learning_rate": 9.266338267783541e-05, "loss": 1.9748, "step": 106 }, { "epoch": 1.3947046843177189, "grad_norm": 0.5355044007301331, "learning_rate": 9.029214085214857e-05, "loss": 1.9777, "step": 107 }, { "epoch": 1.4077393075356415, "grad_norm": 0.40355584025382996, "learning_rate": 8.795366836260173e-05, "loss": 1.9819, "step": 108 }, { "epoch": 1.420773930753564, "grad_norm": 0.30112820863723755, "learning_rate": 8.564899094051614e-05, "loss": 1.9478, "step": 109 }, { "epoch": 1.4338085539714869, "grad_norm": 0.286545068025589, "learning_rate": 8.33791194935794e-05, "loss": 1.9474, "step": 110 }, { "epoch": 1.4468431771894092, "grad_norm": 0.3926275372505188, "learning_rate": 8.11450496624281e-05, "loss": 1.9474, "step": 111 }, { "epoch": 1.459877800407332, "grad_norm": 0.5009521245956421, "learning_rate": 7.894776138392688e-05, "loss": 2.0219, "step": 112 }, { "epoch": 1.4729124236252547, "grad_norm": 0.6338520646095276, "learning_rate": 7.678821846133576e-05, "loss": 2.0353, "step": 113 }, { "epoch": 1.4859470468431772, "grad_norm": 0.7013182044029236, "learning_rate": 7.466736814155418e-05, "loss": 1.62, "step": 114 }, { "epoch": 1.4989816700610998, "grad_norm": 0.40693536400794983, "learning_rate": 7.258614069962701e-05, "loss": 2.5179, "step": 115 }, { "epoch": 1.5120162932790224, "grad_norm": 0.31738847494125366, "learning_rate": 7.054544903069565e-05, "loss": 1.9209, "step": 116 }, { "epoch": 1.525050916496945, "grad_norm": 0.3408113718032837, "learning_rate": 6.854618824957157e-05, "loss": 1.9408, "step": 117 }, { "epoch": 1.5380855397148676, "grad_norm": 0.30903321504592896, "learning_rate": 6.658923529810946e-05, "loss": 1.9548, "step": 118 }, { "epoch": 1.5511201629327902, "grad_norm": 0.30865398049354553, "learning_rate": 6.467544856055175e-05, "loss": 1.9355, "step": 119 }, { "epoch": 1.5641547861507128, "grad_norm": 0.2949885427951813, "learning_rate": 6.28056674870129e-05, "loss": 1.9534, "step": 120 }, { "epoch": 1.5771894093686354, "grad_norm": 0.28894004225730896, "learning_rate": 6.098071222526847e-05, "loss": 1.9511, "step": 121 }, { "epoch": 1.590224032586558, "grad_norm": 0.2710161507129669, "learning_rate": 5.9201383261011636e-05, "loss": 1.9424, "step": 122 }, { "epoch": 1.6032586558044808, "grad_norm": 0.30530881881713867, "learning_rate": 5.7468461066733505e-05, "loss": 1.9794, "step": 123 }, { "epoch": 1.6162932790224032, "grad_norm": 0.3637678027153015, "learning_rate": 5.578270575938211e-05, "loss": 1.9562, "step": 124 }, { "epoch": 1.629327902240326, "grad_norm": 0.4117651581764221, "learning_rate": 5.4144856766949957e-05, "loss": 1.9505, "step": 125 }, { "epoch": 1.629327902240326, "eval_loss": 1.9424113035202026, "eval_runtime": 0.2795, "eval_samples_per_second": 178.903, "eval_steps_per_second": 46.515, "step": 125 }, { "epoch": 1.6423625254582483, "grad_norm": 0.5891804695129395, "learning_rate": 5.255563250413657e-05, "loss": 2.0759, "step": 126 }, { "epoch": 1.6553971486761712, "grad_norm": 0.6447423696517944, "learning_rate": 5.1015730057227994e-05, "loss": 2.2958, "step": 127 }, { "epoch": 1.6684317718940935, "grad_norm": 0.3930685818195343, "learning_rate": 4.952582487833161e-05, "loss": 1.817, "step": 128 }, { "epoch": 1.6814663951120163, "grad_norm": 0.43411514163017273, "learning_rate": 4.808657048910077e-05, "loss": 1.9874, "step": 129 }, { "epoch": 1.694501018329939, "grad_norm": 0.4237060546875, "learning_rate": 4.669859819407844e-05, "loss": 1.9388, "step": 130 }, { "epoch": 1.7075356415478615, "grad_norm": 0.3899717926979065, "learning_rate": 4.536251680378601e-05, "loss": 1.9506, "step": 131 }, { "epoch": 1.7205702647657841, "grad_norm": 0.3600156009197235, "learning_rate": 4.407891236767926e-05, "loss": 1.9307, "step": 132 }, { "epoch": 1.7336048879837067, "grad_norm": 0.29930025339126587, "learning_rate": 4.2848347917087386e-05, "loss": 1.9395, "step": 133 }, { "epoch": 1.7466395112016293, "grad_norm": 0.28485575318336487, "learning_rate": 4.167136321824887e-05, "loss": 1.9331, "step": 134 }, { "epoch": 1.759674134419552, "grad_norm": 0.28593364357948303, "learning_rate": 4.054847453555244e-05, "loss": 1.9109, "step": 135 }, { "epoch": 1.7727087576374747, "grad_norm": 0.33563509583473206, "learning_rate": 3.948017440508607e-05, "loss": 1.94, "step": 136 }, { "epoch": 1.785743380855397, "grad_norm": 0.41171127557754517, "learning_rate": 3.846693141859465e-05, "loss": 1.9329, "step": 137 }, { "epoch": 1.79877800407332, "grad_norm": 0.5771033763885498, "learning_rate": 3.7509190017940066e-05, "loss": 2.0034, "step": 138 }, { "epoch": 1.8118126272912423, "grad_norm": 0.566257655620575, "learning_rate": 3.660737030015427e-05, "loss": 1.5604, "step": 139 }, { "epoch": 1.824847250509165, "grad_norm": 0.44189655780792236, "learning_rate": 3.576186783317092e-05, "loss": 2.4998, "step": 140 }, { "epoch": 1.8378818737270874, "grad_norm": 0.3839716911315918, "learning_rate": 3.4973053482316156e-05, "loss": 1.9946, "step": 141 }, { "epoch": 1.8509164969450103, "grad_norm": 0.3798718750476837, "learning_rate": 3.4241273247634805e-05, "loss": 1.9378, "step": 142 }, { "epoch": 1.8639511201629326, "grad_norm": 0.33077380061149597, "learning_rate": 3.356684811212336e-05, "loss": 1.9503, "step": 143 }, { "epoch": 1.8769857433808554, "grad_norm": 0.3309776186943054, "learning_rate": 3.2950073900936234e-05, "loss": 1.9187, "step": 144 }, { "epoch": 1.890020366598778, "grad_norm": 0.2614622116088867, "learning_rate": 3.2391221151627036e-05, "loss": 1.9367, "step": 145 }, { "epoch": 1.9030549898167006, "grad_norm": 0.27304932475090027, "learning_rate": 3.1890534995481836e-05, "loss": 1.9302, "step": 146 }, { "epoch": 1.9160896130346232, "grad_norm": 0.26790907979011536, "learning_rate": 3.144823504999658e-05, "loss": 1.9228, "step": 147 }, { "epoch": 1.9291242362525458, "grad_norm": 0.3259439468383789, "learning_rate": 3.10645153225455e-05, "loss": 1.9133, "step": 148 }, { "epoch": 1.9421588594704684, "grad_norm": 0.40042850375175476, "learning_rate": 3.0739544125283105e-05, "loss": 1.9946, "step": 149 }, { "epoch": 1.955193482688391, "grad_norm": 0.49017587304115295, "learning_rate": 3.047346400131691e-05, "loss": 1.9776, "step": 150 }, { "epoch": 1.955193482688391, "eval_loss": 1.9268525838851929, "eval_runtime": 0.2825, "eval_samples_per_second": 176.994, "eval_steps_per_second": 46.019, "step": 150 }, { "epoch": 1.9682281059063138, "grad_norm": 0.5727090835571289, "learning_rate": 3.0266391662183335e-05, "loss": 2.0008, "step": 151 }, { "epoch": 1.9812627291242362, "grad_norm": 0.6628953814506531, "learning_rate": 3.0118417936654115e-05, "loss": 2.2794, "step": 152 }, { "epoch": 1.994297352342159, "grad_norm": 0.3259391188621521, "learning_rate": 3.0029607730895854e-05, "loss": 2.0307, "step": 153 }, { "epoch": 2.0073319755600814, "grad_norm": 0.36595845222473145, "learning_rate": 2.9999999999999997e-05, "loss": 2.3735, "step": 154 } ], "logging_steps": 1, "max_steps": 154, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.841916242211635e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }