|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.998682013882787, |
|
"eval_steps": 427, |
|
"global_step": 4266, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.030225814954749144, |
|
"grad_norm": 8.637994766235352, |
|
"learning_rate": 4.777802058246361e-05, |
|
"loss": 0.7443, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.06045162990949829, |
|
"grad_norm": 8.726279258728027, |
|
"learning_rate": 4.77420943412748e-05, |
|
"loss": 0.3904, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.09067744486424743, |
|
"grad_norm": 18.751718521118164, |
|
"learning_rate": 4.768225729865107e-05, |
|
"loss": 0.2274, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.12090325981899658, |
|
"grad_norm": 11.341946601867676, |
|
"learning_rate": 4.759856945148865e-05, |
|
"loss": 0.2032, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.1511290747737457, |
|
"grad_norm": 6.5374932289123535, |
|
"learning_rate": 4.749111471120532e-05, |
|
"loss": 0.1546, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.18135488972849487, |
|
"grad_norm": 7.3821539878845215, |
|
"learning_rate": 4.7360000819604815e-05, |
|
"loss": 0.177, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.211580704683244, |
|
"grad_norm": 4.396503448486328, |
|
"learning_rate": 4.7205359240847186e-05, |
|
"loss": 0.1611, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.24180651963799316, |
|
"grad_norm": 4.967313766479492, |
|
"learning_rate": 4.702734502963346e-05, |
|
"loss": 0.1408, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.2720323345927423, |
|
"grad_norm": 9.22049331665039, |
|
"learning_rate": 4.6826136675736696e-05, |
|
"loss": 0.1235, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.3001493717599508, |
|
"eval_accuracy": 0.9654081834354615, |
|
"eval_loss": 0.12473645806312561, |
|
"eval_precision": 0.9140316205533597, |
|
"eval_runtime": 42.4722, |
|
"eval_samples_per_second": 119.113, |
|
"eval_steps_per_second": 29.784, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.3022581495474914, |
|
"grad_norm": 1.0973700284957886, |
|
"learning_rate": 4.660193592503536e-05, |
|
"loss": 0.1789, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3324839645022406, |
|
"grad_norm": 12.724760055541992, |
|
"learning_rate": 4.635496757722852e-05, |
|
"loss": 0.1526, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.36270977945698973, |
|
"grad_norm": 0.692093014717102, |
|
"learning_rate": 4.60854792604356e-05, |
|
"loss": 0.0998, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.39293559441173886, |
|
"grad_norm": 8.747737884521484, |
|
"learning_rate": 4.579374118290669e-05, |
|
"loss": 0.1417, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.423161409366488, |
|
"grad_norm": 3.383861541748047, |
|
"learning_rate": 4.548004586209246e-05, |
|
"loss": 0.1416, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.4533872243212371, |
|
"grad_norm": 2.690746545791626, |
|
"learning_rate": 4.514470783134528e-05, |
|
"loss": 0.1242, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.4836130392759863, |
|
"grad_norm": 4.980698585510254, |
|
"learning_rate": 4.4788063324545544e-05, |
|
"loss": 0.0938, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.5138388542307354, |
|
"grad_norm": 3.457531452178955, |
|
"learning_rate": 4.441046993896959e-05, |
|
"loss": 0.113, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.5440646691854846, |
|
"grad_norm": 0.17746348679065704, |
|
"learning_rate": 4.40123062767371e-05, |
|
"loss": 0.086, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.5742904841402338, |
|
"grad_norm": 1.7960216999053955, |
|
"learning_rate": 4.359397156519751e-05, |
|
"loss": 0.085, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.6002987435199016, |
|
"eval_accuracy": 0.9703498715161099, |
|
"eval_loss": 0.190622478723526, |
|
"eval_precision": 0.918525703200776, |
|
"eval_runtime": 51.3537, |
|
"eval_samples_per_second": 98.513, |
|
"eval_steps_per_second": 24.633, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.6045162990949828, |
|
"grad_norm": 0.05534067377448082, |
|
"learning_rate": 4.315588525663622e-05, |
|
"loss": 0.1477, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.634742114049732, |
|
"grad_norm": 5.788769245147705, |
|
"learning_rate": 4.269848660770166e-05, |
|
"loss": 0.1854, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.6649679290044812, |
|
"grad_norm": 4.581574440002441, |
|
"learning_rate": 4.222223423897522e-05, |
|
"loss": 0.0987, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.6951937439592303, |
|
"grad_norm": 0.24250301718711853, |
|
"learning_rate": 4.1727605675125466e-05, |
|
"loss": 0.1237, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.7254195589139795, |
|
"grad_norm": 6.998917579650879, |
|
"learning_rate": 4.1215096866107764e-05, |
|
"loss": 0.1304, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.7556453738687285, |
|
"grad_norm": 9.230533599853516, |
|
"learning_rate": 4.068522168988941e-05, |
|
"loss": 0.1212, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.7858711888234777, |
|
"grad_norm": 0.515550971031189, |
|
"learning_rate": 4.013851143719886e-05, |
|
"loss": 0.0942, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.8160970037782269, |
|
"grad_norm": 14.719630241394043, |
|
"learning_rate": 3.9575514278815625e-05, |
|
"loss": 0.1182, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.846322818732976, |
|
"grad_norm": 1.9909157752990723, |
|
"learning_rate": 3.899679471593512e-05, |
|
"loss": 0.099, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.8765486336877252, |
|
"grad_norm": 1.0096828937530518, |
|
"learning_rate": 3.840293301415938e-05, |
|
"loss": 0.1158, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.9004481152798524, |
|
"eval_accuracy": 0.9756868946432101, |
|
"eval_loss": 0.10856811702251434, |
|
"eval_precision": 0.941468253968254, |
|
"eval_runtime": 41.486, |
|
"eval_samples_per_second": 121.945, |
|
"eval_steps_per_second": 30.492, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 0.9067744486424743, |
|
"grad_norm": 0.669986367225647, |
|
"learning_rate": 3.779452462168126e-05, |
|
"loss": 0.101, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.9370002635972234, |
|
"grad_norm": 0.9615168571472168, |
|
"learning_rate": 3.7172179572245585e-05, |
|
"loss": 0.1166, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.9672260785519726, |
|
"grad_norm": 13.886622428894043, |
|
"learning_rate": 3.6536521873485673e-05, |
|
"loss": 0.1308, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.9974518935067217, |
|
"grad_norm": 0.23971609771251678, |
|
"learning_rate": 3.588818888124863e-05, |
|
"loss": 0.1266, |
|
"step": 1419 |
|
}, |
|
{ |
|
"epoch": 1.0276777084614708, |
|
"grad_norm": 4.089532375335693, |
|
"learning_rate": 3.5227830660536916e-05, |
|
"loss": 0.0844, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 1.05790352341622, |
|
"grad_norm": 6.12522554397583, |
|
"learning_rate": 3.455610933370663e-05, |
|
"loss": 0.0843, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 1.0881293383709691, |
|
"grad_norm": 0.24417218565940857, |
|
"learning_rate": 3.3873698416576314e-05, |
|
"loss": 0.0825, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 1.1183551533257183, |
|
"grad_norm": 5.748846054077148, |
|
"learning_rate": 3.3181282143111886e-05, |
|
"loss": 0.0885, |
|
"step": 1591 |
|
}, |
|
{ |
|
"epoch": 1.1485809682804675, |
|
"grad_norm": 0.8179301619529724, |
|
"learning_rate": 3.247955477936471e-05, |
|
"loss": 0.0882, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 1.1788067832352165, |
|
"grad_norm": 0.9235413670539856, |
|
"learning_rate": 3.176921992735089e-05, |
|
"loss": 0.0859, |
|
"step": 1677 |
|
}, |
|
{ |
|
"epoch": 1.2005974870398033, |
|
"eval_accuracy": 0.9756868946432101, |
|
"eval_loss": 0.09278497099876404, |
|
"eval_precision": 0.9550102249488752, |
|
"eval_runtime": 41.4152, |
|
"eval_samples_per_second": 122.153, |
|
"eval_steps_per_second": 30.544, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 1.2090325981899657, |
|
"grad_norm": 0.0459032878279686, |
|
"learning_rate": 3.1050989819569484e-05, |
|
"loss": 0.0603, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.2392584131447149, |
|
"grad_norm": 2.4066104888916016, |
|
"learning_rate": 3.0325584604867403e-05, |
|
"loss": 0.071, |
|
"step": 1763 |
|
}, |
|
{ |
|
"epoch": 1.269484228099464, |
|
"grad_norm": 9.008522987365723, |
|
"learning_rate": 2.9593731626366644e-05, |
|
"loss": 0.0678, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 1.2997100430542132, |
|
"grad_norm": 6.748093605041504, |
|
"learning_rate": 2.8856164692178087e-05, |
|
"loss": 0.1068, |
|
"step": 1849 |
|
}, |
|
{ |
|
"epoch": 1.3299358580089624, |
|
"grad_norm": 0.8588669300079346, |
|
"learning_rate": 2.8113623339633016e-05, |
|
"loss": 0.0835, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 1.3601616729637114, |
|
"grad_norm": 2.512568235397339, |
|
"learning_rate": 2.736685209377016e-05, |
|
"loss": 0.0368, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 1.3903874879184606, |
|
"grad_norm": 3.670646905899048, |
|
"learning_rate": 2.661659972082166e-05, |
|
"loss": 0.0795, |
|
"step": 1978 |
|
}, |
|
{ |
|
"epoch": 1.4206133028732097, |
|
"grad_norm": 0.5933941602706909, |
|
"learning_rate": 2.5863618477446485e-05, |
|
"loss": 0.0615, |
|
"step": 2021 |
|
}, |
|
{ |
|
"epoch": 1.450839117827959, |
|
"grad_norm": 0.8403681516647339, |
|
"learning_rate": 2.5108663356464178e-05, |
|
"loss": 0.087, |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 1.481064932782708, |
|
"grad_norm": 2.5098040103912354, |
|
"learning_rate": 2.4352491329845075e-05, |
|
"loss": 0.0821, |
|
"step": 2107 |
|
}, |
|
{ |
|
"epoch": 1.500746858799754, |
|
"eval_accuracy": 0.9764775647361138, |
|
"eval_loss": 0.09056767076253891, |
|
"eval_precision": 0.935672514619883, |
|
"eval_runtime": 41.6907, |
|
"eval_samples_per_second": 121.346, |
|
"eval_steps_per_second": 30.342, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 1.511290747737457, |
|
"grad_norm": 8.705094337463379, |
|
"learning_rate": 2.3595860589716064e-05, |
|
"loss": 0.0714, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.5415165626922063, |
|
"grad_norm": 0.13956770300865173, |
|
"learning_rate": 2.2839529788143032e-05, |
|
"loss": 0.0696, |
|
"step": 2193 |
|
}, |
|
{ |
|
"epoch": 1.5717423776469555, |
|
"grad_norm": 5.737203121185303, |
|
"learning_rate": 2.208425727645198e-05, |
|
"loss": 0.088, |
|
"step": 2236 |
|
}, |
|
{ |
|
"epoch": 1.6019681926017046, |
|
"grad_norm": 2.958165407180786, |
|
"learning_rate": 2.1330800344851852e-05, |
|
"loss": 0.0797, |
|
"step": 2279 |
|
}, |
|
{ |
|
"epoch": 1.6321940075564538, |
|
"grad_norm": 3.428828477859497, |
|
"learning_rate": 2.0579914463121218e-05, |
|
"loss": 0.0564, |
|
"step": 2322 |
|
}, |
|
{ |
|
"epoch": 1.662419822511203, |
|
"grad_norm": 1.1754554510116577, |
|
"learning_rate": 1.983235252312024e-05, |
|
"loss": 0.0611, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 1.692645637465952, |
|
"grad_norm": 6.087173938751221, |
|
"learning_rate": 1.9088864083887505e-05, |
|
"loss": 0.0865, |
|
"step": 2408 |
|
}, |
|
{ |
|
"epoch": 1.7228714524207012, |
|
"grad_norm": 3.829490900039673, |
|
"learning_rate": 1.835019462007857e-05, |
|
"loss": 0.0882, |
|
"step": 2451 |
|
}, |
|
{ |
|
"epoch": 1.7530972673754504, |
|
"grad_norm": 7.614952087402344, |
|
"learning_rate": 1.761708477449973e-05, |
|
"loss": 0.0445, |
|
"step": 2494 |
|
}, |
|
{ |
|
"epoch": 1.7833230823301993, |
|
"grad_norm": 35.23410415649414, |
|
"learning_rate": 1.689026961548663e-05, |
|
"loss": 0.0593, |
|
"step": 2537 |
|
}, |
|
{ |
|
"epoch": 1.800896230559705, |
|
"eval_accuracy": 0.9794425775845028, |
|
"eval_loss": 0.07280829548835754, |
|
"eval_precision": 0.9513406156901688, |
|
"eval_runtime": 41.7842, |
|
"eval_samples_per_second": 121.075, |
|
"eval_steps_per_second": 30.275, |
|
"step": 2562 |
|
}, |
|
{ |
|
"epoch": 1.8135488972849485, |
|
"grad_norm": 0.42250823974609375, |
|
"learning_rate": 1.6170477899872236e-05, |
|
"loss": 0.0682, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.8437747122396977, |
|
"grad_norm": 3.1888535022735596, |
|
"learning_rate": 1.5458431342283072e-05, |
|
"loss": 0.1021, |
|
"step": 2623 |
|
}, |
|
{ |
|
"epoch": 1.8740005271944469, |
|
"grad_norm": 0.11506301164627075, |
|
"learning_rate": 1.4754843891496589e-05, |
|
"loss": 0.0655, |
|
"step": 2666 |
|
}, |
|
{ |
|
"epoch": 1.904226342149196, |
|
"grad_norm": 0.1821933388710022, |
|
"learning_rate": 1.4060421014585115e-05, |
|
"loss": 0.0718, |
|
"step": 2709 |
|
}, |
|
{ |
|
"epoch": 1.9344521571039452, |
|
"grad_norm": 4.954631805419922, |
|
"learning_rate": 1.3375858989564059e-05, |
|
"loss": 0.0727, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 1.9646779720586944, |
|
"grad_norm": 3.8650524616241455, |
|
"learning_rate": 1.270184420725387e-05, |
|
"loss": 0.0794, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 1.9949037870134436, |
|
"grad_norm": 1.1344525814056396, |
|
"learning_rate": 1.203905248305552e-05, |
|
"loss": 0.0832, |
|
"step": 2838 |
|
}, |
|
{ |
|
"epoch": 2.025129601968193, |
|
"grad_norm": 0.8010286688804626, |
|
"learning_rate": 1.1388148379329648e-05, |
|
"loss": 0.0455, |
|
"step": 2881 |
|
}, |
|
{ |
|
"epoch": 2.0553554169229415, |
|
"grad_norm": 0.04408084228634834, |
|
"learning_rate": 1.074978453905885e-05, |
|
"loss": 0.0328, |
|
"step": 2924 |
|
}, |
|
{ |
|
"epoch": 2.0855812318776907, |
|
"grad_norm": 0.044715262949466705, |
|
"learning_rate": 1.0124601031461207e-05, |
|
"loss": 0.0527, |
|
"step": 2967 |
|
}, |
|
{ |
|
"epoch": 2.1010456023196555, |
|
"eval_accuracy": 0.9774659023522435, |
|
"eval_loss": 0.08738358318805695, |
|
"eval_precision": 0.9481555333998006, |
|
"eval_runtime": 41.7468, |
|
"eval_samples_per_second": 121.183, |
|
"eval_steps_per_second": 30.302, |
|
"step": 2989 |
|
}, |
|
{ |
|
"epoch": 2.11580704683244, |
|
"grad_norm": 0.9349909424781799, |
|
"learning_rate": 9.513224710211058e-06, |
|
"loss": 0.0401, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 2.146032861787189, |
|
"grad_norm": 9.170902252197266, |
|
"learning_rate": 8.91626858491075e-06, |
|
"loss": 0.0347, |
|
"step": 3053 |
|
}, |
|
{ |
|
"epoch": 2.1762586767419383, |
|
"grad_norm": 0.14816665649414062, |
|
"learning_rate": 8.334331206443409e-06, |
|
"loss": 0.0252, |
|
"step": 3096 |
|
}, |
|
{ |
|
"epoch": 2.2064844916966875, |
|
"grad_norm": 0.42291557788848877, |
|
"learning_rate": 7.767996066822973e-06, |
|
"loss": 0.054, |
|
"step": 3139 |
|
}, |
|
{ |
|
"epoch": 2.2367103066514367, |
|
"grad_norm": 0.06259565055370331, |
|
"learning_rate": 7.2178310141434304e-06, |
|
"loss": 0.0534, |
|
"step": 3182 |
|
}, |
|
{ |
|
"epoch": 2.266936121606186, |
|
"grad_norm": 0.24870078265666962, |
|
"learning_rate": 6.684387683213754e-06, |
|
"loss": 0.0706, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 2.297161936560935, |
|
"grad_norm": 4.754918098449707, |
|
"learning_rate": 6.168200942449347e-06, |
|
"loss": 0.0439, |
|
"step": 3268 |
|
}, |
|
{ |
|
"epoch": 2.3273877515156842, |
|
"grad_norm": 0.26877379417419434, |
|
"learning_rate": 5.669788357574799e-06, |
|
"loss": 0.0586, |
|
"step": 3311 |
|
}, |
|
{ |
|
"epoch": 2.357613566470433, |
|
"grad_norm": 0.19906023144721985, |
|
"learning_rate": 5.189649672675441e-06, |
|
"loss": 0.0324, |
|
"step": 3354 |
|
}, |
|
{ |
|
"epoch": 2.387839381425182, |
|
"grad_norm": 0.01543935015797615, |
|
"learning_rate": 4.728266309118228e-06, |
|
"loss": 0.0263, |
|
"step": 3397 |
|
}, |
|
{ |
|
"epoch": 2.4011949740796066, |
|
"eval_accuracy": 0.9794425775845028, |
|
"eval_loss": 0.0934256836771965, |
|
"eval_precision": 0.954045954045954, |
|
"eval_runtime": 41.7334, |
|
"eval_samples_per_second": 121.222, |
|
"eval_steps_per_second": 30.311, |
|
"step": 3416 |
|
}, |
|
{ |
|
"epoch": 2.4180651963799313, |
|
"grad_norm": 23.245332717895508, |
|
"learning_rate": 4.286100882844285e-06, |
|
"loss": 0.029, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 2.4482910113346805, |
|
"grad_norm": 0.19245454668998718, |
|
"learning_rate": 3.863596740517041e-06, |
|
"loss": 0.0473, |
|
"step": 3483 |
|
}, |
|
{ |
|
"epoch": 2.4785168262894297, |
|
"grad_norm": 0.045882448554039, |
|
"learning_rate": 3.461177514991228e-06, |
|
"loss": 0.0393, |
|
"step": 3526 |
|
}, |
|
{ |
|
"epoch": 2.508742641244179, |
|
"grad_norm": 0.06130468472838402, |
|
"learning_rate": 3.079246700548261e-06, |
|
"loss": 0.0425, |
|
"step": 3569 |
|
}, |
|
{ |
|
"epoch": 2.538968456198928, |
|
"grad_norm": 0.34582972526550293, |
|
"learning_rate": 2.718187248324039e-06, |
|
"loss": 0.0349, |
|
"step": 3612 |
|
}, |
|
{ |
|
"epoch": 2.5691942711536773, |
|
"grad_norm": 1.3404638767242432, |
|
"learning_rate": 2.37836118233478e-06, |
|
"loss": 0.0362, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 2.5994200861084265, |
|
"grad_norm": 0.06872310489416122, |
|
"learning_rate": 2.060109236485834e-06, |
|
"loss": 0.0612, |
|
"step": 3698 |
|
}, |
|
{ |
|
"epoch": 2.629645901063175, |
|
"grad_norm": 2.3359293937683105, |
|
"learning_rate": 1.7637505129275284e-06, |
|
"loss": 0.048, |
|
"step": 3741 |
|
}, |
|
{ |
|
"epoch": 2.659871716017925, |
|
"grad_norm": 5.89841365814209, |
|
"learning_rate": 1.489582162100559e-06, |
|
"loss": 0.0357, |
|
"step": 3784 |
|
}, |
|
{ |
|
"epoch": 2.6900975309726736, |
|
"grad_norm": 0.1834953874349594, |
|
"learning_rate": 1.2378790847916738e-06, |
|
"loss": 0.0566, |
|
"step": 3827 |
|
}, |
|
{ |
|
"epoch": 2.701344345839557, |
|
"eval_accuracy": 0.9788495750148251, |
|
"eval_loss": 0.08470374345779419, |
|
"eval_precision": 0.9548192771084337, |
|
"eval_runtime": 41.7346, |
|
"eval_samples_per_second": 121.218, |
|
"eval_steps_per_second": 30.311, |
|
"step": 3843 |
|
}, |
|
{ |
|
"epoch": 2.7203233459274228, |
|
"grad_norm": 6.001282215118408, |
|
"learning_rate": 1.008893656498535e-06, |
|
"loss": 0.0427, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.750549160882172, |
|
"grad_norm": 6.217470169067383, |
|
"learning_rate": 8.028554743799881e-07, |
|
"loss": 0.0367, |
|
"step": 3913 |
|
}, |
|
{ |
|
"epoch": 2.780774975836921, |
|
"grad_norm": 4.40556526184082, |
|
"learning_rate": 6.199711270455366e-07, |
|
"loss": 0.0317, |
|
"step": 3956 |
|
}, |
|
{ |
|
"epoch": 2.8110007907916703, |
|
"grad_norm": 0.33150240778923035, |
|
"learning_rate": 4.604239874148275e-07, |
|
"loss": 0.0429, |
|
"step": 3999 |
|
}, |
|
{ |
|
"epoch": 2.8412266057464195, |
|
"grad_norm": 5.160287857055664, |
|
"learning_rate": 3.2437402885488323e-07, |
|
"loss": 0.0316, |
|
"step": 4042 |
|
}, |
|
{ |
|
"epoch": 2.8714524207011687, |
|
"grad_norm": 0.10348004102706909, |
|
"learning_rate": 2.1195766477933204e-07, |
|
"loss": 0.032, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 2.901678235655918, |
|
"grad_norm": 0.10884183645248413, |
|
"learning_rate": 1.2328761187059095e-07, |
|
"loss": 0.0151, |
|
"step": 4128 |
|
}, |
|
{ |
|
"epoch": 2.931904050610667, |
|
"grad_norm": 5.601053714752197, |
|
"learning_rate": 5.8452777062061706e-08, |
|
"loss": 0.0296, |
|
"step": 4171 |
|
}, |
|
{ |
|
"epoch": 2.962129865565416, |
|
"grad_norm": 0.09085728973150253, |
|
"learning_rate": 1.7518168393672898e-08, |
|
"loss": 0.0384, |
|
"step": 4214 |
|
}, |
|
{ |
|
"epoch": 2.9923556805201654, |
|
"grad_norm": 0.4429589807987213, |
|
"learning_rate": 5.248298301814908e-10, |
|
"loss": 0.0467, |
|
"step": 4257 |
|
} |
|
], |
|
"logging_steps": 43, |
|
"max_steps": 4266, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.964461063575552e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|