|
{ |
|
"best_metric": 0.7846992611885071, |
|
"best_model_checkpoint": "/output/zgt-roberta-large-finetuned-Gu21schedule-BS256-10ep/checkpoint-31926", |
|
"epoch": 9.99912662907937, |
|
"global_step": 32200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.12095168799057326, |
|
"eval_loss": 6.969939231872559, |
|
"eval_runtime": 4175.606, |
|
"eval_samples_per_second": 24.712, |
|
"eval_steps_per_second": 3.089, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.5527950310559007e-05, |
|
"loss": 7.8141, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.1524057089518688, |
|
"eval_loss": 6.233978271484375, |
|
"eval_runtime": 4177.4061, |
|
"eval_samples_per_second": 24.701, |
|
"eval_steps_per_second": 3.088, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.16394060788435189, |
|
"eval_loss": 6.122518539428711, |
|
"eval_runtime": 4179.7801, |
|
"eval_samples_per_second": 24.687, |
|
"eval_steps_per_second": 3.086, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.1055900621118014e-05, |
|
"loss": 6.1501, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.16842972332423103, |
|
"eval_loss": 6.045305252075195, |
|
"eval_runtime": 4182.1677, |
|
"eval_samples_per_second": 24.673, |
|
"eval_steps_per_second": 3.084, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.658385093167702e-05, |
|
"loss": 6.0737, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.16725142510331475, |
|
"eval_loss": 5.987020969390869, |
|
"eval_runtime": 4177.226, |
|
"eval_samples_per_second": 24.702, |
|
"eval_steps_per_second": 3.088, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.17195286451581493, |
|
"eval_loss": 5.9314141273498535, |
|
"eval_runtime": 4165.2923, |
|
"eval_samples_per_second": 24.773, |
|
"eval_steps_per_second": 3.097, |
|
"step": 1878 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.211180124223603e-05, |
|
"loss": 5.9753, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.17336381929439706, |
|
"eval_loss": 5.887312412261963, |
|
"eval_runtime": 4179.8742, |
|
"eval_samples_per_second": 24.687, |
|
"eval_steps_per_second": 3.086, |
|
"step": 2191 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.763975155279503e-05, |
|
"loss": 5.9008, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.1737213537890283, |
|
"eval_loss": 5.851128101348877, |
|
"eval_runtime": 4181.0014, |
|
"eval_samples_per_second": 24.68, |
|
"eval_steps_per_second": 3.085, |
|
"step": 2504 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.1730111369884377, |
|
"eval_loss": 5.819331645965576, |
|
"eval_runtime": 4182.8162, |
|
"eval_samples_per_second": 24.669, |
|
"eval_steps_per_second": 3.084, |
|
"step": 2817 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.316770186335404e-05, |
|
"loss": 5.8379, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.17577215201939525, |
|
"eval_loss": 5.785292625427246, |
|
"eval_runtime": 4184.6612, |
|
"eval_samples_per_second": 24.658, |
|
"eval_steps_per_second": 3.082, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.1759501659892642, |
|
"eval_loss": 5.762609481811523, |
|
"eval_runtime": 4182.8465, |
|
"eval_samples_per_second": 24.669, |
|
"eval_steps_per_second": 3.084, |
|
"step": 3443 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.903381642512077e-05, |
|
"loss": 5.7885, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_accuracy": 0.17622262356212584, |
|
"eval_loss": 5.739773273468018, |
|
"eval_runtime": 4170.2068, |
|
"eval_samples_per_second": 24.744, |
|
"eval_steps_per_second": 3.093, |
|
"step": 3756 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 9.730848861283644e-05, |
|
"loss": 5.7464, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_accuracy": 0.1766708743216783, |
|
"eval_loss": 5.720291614532471, |
|
"eval_runtime": 4172.2987, |
|
"eval_samples_per_second": 24.731, |
|
"eval_steps_per_second": 3.092, |
|
"step": 4069 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_accuracy": 0.17642257605364067, |
|
"eval_loss": 5.696093559265137, |
|
"eval_runtime": 4172.9563, |
|
"eval_samples_per_second": 24.728, |
|
"eval_steps_per_second": 3.091, |
|
"step": 4382 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 9.558316080055211e-05, |
|
"loss": 5.7149, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_accuracy": 0.17737878845804067, |
|
"eval_loss": 5.683297634124756, |
|
"eval_runtime": 4185.0558, |
|
"eval_samples_per_second": 24.656, |
|
"eval_steps_per_second": 3.082, |
|
"step": 4695 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.385783298826778e-05, |
|
"loss": 5.6885, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_accuracy": 0.1773234355905989, |
|
"eval_loss": 5.670944690704346, |
|
"eval_runtime": 4184.0795, |
|
"eval_samples_per_second": 24.662, |
|
"eval_steps_per_second": 3.083, |
|
"step": 5008 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_accuracy": 0.1774909217337557, |
|
"eval_loss": 5.6566619873046875, |
|
"eval_runtime": 4185.5299, |
|
"eval_samples_per_second": 24.653, |
|
"eval_steps_per_second": 3.082, |
|
"step": 5321 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 9.213250517598345e-05, |
|
"loss": 5.666, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.17712345558640866, |
|
"eval_loss": 5.647720813751221, |
|
"eval_runtime": 4178.6309, |
|
"eval_samples_per_second": 24.694, |
|
"eval_steps_per_second": 3.087, |
|
"step": 5634 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy": 0.1778983102903151, |
|
"eval_loss": 5.632648944854736, |
|
"eval_runtime": 4179.5279, |
|
"eval_samples_per_second": 24.689, |
|
"eval_steps_per_second": 3.086, |
|
"step": 5947 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 9.04071773636991e-05, |
|
"loss": 5.6458, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.1767171879623295, |
|
"eval_loss": 5.626438140869141, |
|
"eval_runtime": 4166.358, |
|
"eval_samples_per_second": 24.767, |
|
"eval_steps_per_second": 3.096, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 8.868184955141477e-05, |
|
"loss": 5.6293, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_accuracy": 0.18033812690505616, |
|
"eval_loss": 5.371770858764648, |
|
"eval_runtime": 4187.0608, |
|
"eval_samples_per_second": 24.644, |
|
"eval_steps_per_second": 3.081, |
|
"step": 6573 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.22889979190190532, |
|
"eval_loss": 4.608296871185303, |
|
"eval_runtime": 4185.2213, |
|
"eval_samples_per_second": 24.655, |
|
"eval_steps_per_second": 3.082, |
|
"step": 6886 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.695652173913044e-05, |
|
"loss": 4.9883, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_accuracy": 0.4614915191453084, |
|
"eval_loss": 3.1735970973968506, |
|
"eval_runtime": 4184.0355, |
|
"eval_samples_per_second": 24.662, |
|
"eval_steps_per_second": 3.083, |
|
"step": 7199 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 8.523119392684611e-05, |
|
"loss": 3.2514, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_accuracy": 0.5874539351791631, |
|
"eval_loss": 2.3033511638641357, |
|
"eval_runtime": 4180.5231, |
|
"eval_samples_per_second": 24.683, |
|
"eval_steps_per_second": 3.085, |
|
"step": 7512 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_accuracy": 0.6407011451986726, |
|
"eval_loss": 1.915069818496704, |
|
"eval_runtime": 4169.2257, |
|
"eval_samples_per_second": 24.75, |
|
"eval_steps_per_second": 3.094, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.350586611456177e-05, |
|
"loss": 2.1211, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.6671589447070213, |
|
"eval_loss": 1.7218379974365234, |
|
"eval_runtime": 4169.0733, |
|
"eval_samples_per_second": 24.751, |
|
"eval_steps_per_second": 3.094, |
|
"step": 8138 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_accuracy": 0.6849309492119486, |
|
"eval_loss": 1.5953431129455566, |
|
"eval_runtime": 4186.0587, |
|
"eval_samples_per_second": 24.65, |
|
"eval_steps_per_second": 3.081, |
|
"step": 8451 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 8.178053830227743e-05, |
|
"loss": 1.7698, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_accuracy": 0.6977890826334601, |
|
"eval_loss": 1.5040490627288818, |
|
"eval_runtime": 4187.545, |
|
"eval_samples_per_second": 24.641, |
|
"eval_steps_per_second": 3.08, |
|
"step": 8764 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 8.00552104899931e-05, |
|
"loss": 1.5907, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.7089186346832345, |
|
"eval_loss": 1.4300199747085571, |
|
"eval_runtime": 4186.4554, |
|
"eval_samples_per_second": 24.648, |
|
"eval_steps_per_second": 3.081, |
|
"step": 9077 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_accuracy": 0.716823811317197, |
|
"eval_loss": 1.3781534433364868, |
|
"eval_runtime": 4180.8901, |
|
"eval_samples_per_second": 24.681, |
|
"eval_steps_per_second": 3.085, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 7.832988267770877e-05, |
|
"loss": 1.4757, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_accuracy": 0.7239590766169688, |
|
"eval_loss": 1.3298745155334473, |
|
"eval_runtime": 4169.3417, |
|
"eval_samples_per_second": 24.749, |
|
"eval_steps_per_second": 3.094, |
|
"step": 9703 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 7.660455486542444e-05, |
|
"loss": 1.3919, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"eval_accuracy": 0.7302072364465025, |
|
"eval_loss": 1.2895771265029907, |
|
"eval_runtime": 4181.4693, |
|
"eval_samples_per_second": 24.677, |
|
"eval_steps_per_second": 3.085, |
|
"step": 10016 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_accuracy": 0.7356055265010495, |
|
"eval_loss": 1.25456964969635, |
|
"eval_runtime": 4169.8649, |
|
"eval_samples_per_second": 24.746, |
|
"eval_steps_per_second": 3.093, |
|
"step": 10329 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 7.48792270531401e-05, |
|
"loss": 1.328, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_accuracy": 0.7406319445625766, |
|
"eval_loss": 1.2240813970565796, |
|
"eval_runtime": 4170.5091, |
|
"eval_samples_per_second": 24.742, |
|
"eval_steps_per_second": 3.093, |
|
"step": 10642 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_accuracy": 0.745277920243707, |
|
"eval_loss": 1.1944962739944458, |
|
"eval_runtime": 4174.7942, |
|
"eval_samples_per_second": 24.717, |
|
"eval_steps_per_second": 3.09, |
|
"step": 10955 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 7.315389924085577e-05, |
|
"loss": 1.2782, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_accuracy": 0.7494278825432492, |
|
"eval_loss": 1.1713906526565552, |
|
"eval_runtime": 4174.964, |
|
"eval_samples_per_second": 24.716, |
|
"eval_steps_per_second": 3.09, |
|
"step": 11268 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 7.142857142857143e-05, |
|
"loss": 1.2357, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_accuracy": 0.7529051534363226, |
|
"eval_loss": 1.1493370532989502, |
|
"eval_runtime": 4169.6928, |
|
"eval_samples_per_second": 24.747, |
|
"eval_steps_per_second": 3.094, |
|
"step": 11581 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_accuracy": 0.7561643316896146, |
|
"eval_loss": 1.1291333436965942, |
|
"eval_runtime": 4170.8848, |
|
"eval_samples_per_second": 24.74, |
|
"eval_steps_per_second": 3.093, |
|
"step": 11894 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 6.970324361628709e-05, |
|
"loss": 1.1986, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_accuracy": 0.7596194578598549, |
|
"eval_loss": 1.1110583543777466, |
|
"eval_runtime": 4174.4741, |
|
"eval_samples_per_second": 24.719, |
|
"eval_steps_per_second": 3.09, |
|
"step": 12207 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 6.797791580400277e-05, |
|
"loss": 1.1673, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_accuracy": 0.7632696453538697, |
|
"eval_loss": 1.091315507888794, |
|
"eval_runtime": 4175.4221, |
|
"eval_samples_per_second": 24.713, |
|
"eval_steps_per_second": 3.089, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_accuracy": 0.7654154836189226, |
|
"eval_loss": 1.0766664743423462, |
|
"eval_runtime": 4173.9192, |
|
"eval_samples_per_second": 24.722, |
|
"eval_steps_per_second": 3.09, |
|
"step": 12833 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 6.625258799171843e-05, |
|
"loss": 1.1387, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_accuracy": 0.7681167597878354, |
|
"eval_loss": 1.0629887580871582, |
|
"eval_runtime": 4175.5718, |
|
"eval_samples_per_second": 24.712, |
|
"eval_steps_per_second": 3.089, |
|
"step": 13146 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_accuracy": 0.7707155612474973, |
|
"eval_loss": 1.046230673789978, |
|
"eval_runtime": 4175.7461, |
|
"eval_samples_per_second": 24.711, |
|
"eval_steps_per_second": 3.089, |
|
"step": 13459 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 6.45272601794341e-05, |
|
"loss": 1.1074, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_accuracy": 0.7731418474891986, |
|
"eval_loss": 1.0337274074554443, |
|
"eval_runtime": 4174.5, |
|
"eval_samples_per_second": 24.718, |
|
"eval_steps_per_second": 3.09, |
|
"step": 13772 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 6.280193236714976e-05, |
|
"loss": 1.0893, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"eval_accuracy": 0.7748741121676294, |
|
"eval_loss": 1.0239219665527344, |
|
"eval_runtime": 4175.3624, |
|
"eval_samples_per_second": 24.713, |
|
"eval_steps_per_second": 3.089, |
|
"step": 14085 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"eval_accuracy": 0.7766543779252701, |
|
"eval_loss": 1.014146089553833, |
|
"eval_runtime": 4181.6475, |
|
"eval_samples_per_second": 24.676, |
|
"eval_steps_per_second": 3.085, |
|
"step": 14398 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 6.107660455486542e-05, |
|
"loss": 1.0682, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"eval_accuracy": 0.7785186060053325, |
|
"eval_loss": 1.0032246112823486, |
|
"eval_runtime": 4180.7422, |
|
"eval_samples_per_second": 24.682, |
|
"eval_steps_per_second": 3.085, |
|
"step": 14711 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 5.9351276742581096e-05, |
|
"loss": 1.0524, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"eval_accuracy": 0.7806327231861181, |
|
"eval_loss": 0.9907068014144897, |
|
"eval_runtime": 4180.8503, |
|
"eval_samples_per_second": 24.681, |
|
"eval_steps_per_second": 3.085, |
|
"step": 15024 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"eval_accuracy": 0.7826726152932884, |
|
"eval_loss": 0.9796966910362244, |
|
"eval_runtime": 4176.9966, |
|
"eval_samples_per_second": 24.704, |
|
"eval_steps_per_second": 3.088, |
|
"step": 15337 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 5.762594893029676e-05, |
|
"loss": 1.0338, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"eval_accuracy": 0.7838565333062765, |
|
"eval_loss": 0.9712271690368652, |
|
"eval_runtime": 4181.1547, |
|
"eval_samples_per_second": 24.679, |
|
"eval_steps_per_second": 3.085, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_accuracy": 0.7854880736072989, |
|
"eval_loss": 0.9626355767250061, |
|
"eval_runtime": 4176.3487, |
|
"eval_samples_per_second": 24.707, |
|
"eval_steps_per_second": 3.089, |
|
"step": 15963 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 5.590062111801242e-05, |
|
"loss": 1.0188, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_accuracy": 0.7867994298371893, |
|
"eval_loss": 0.9558107256889343, |
|
"eval_runtime": 4179.0241, |
|
"eval_samples_per_second": 24.692, |
|
"eval_steps_per_second": 3.087, |
|
"step": 16276 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 5.417529330572809e-05, |
|
"loss": 1.003, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"eval_accuracy": 0.7884956846420655, |
|
"eval_loss": 0.9470182657241821, |
|
"eval_runtime": 4176.2829, |
|
"eval_samples_per_second": 24.708, |
|
"eval_steps_per_second": 3.089, |
|
"step": 16589 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"eval_accuracy": 0.789320227965163, |
|
"eval_loss": 0.9419927597045898, |
|
"eval_runtime": 4176.4362, |
|
"eval_samples_per_second": 24.707, |
|
"eval_steps_per_second": 3.089, |
|
"step": 16902 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 5.244996549344375e-05, |
|
"loss": 0.989, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"eval_accuracy": 0.7911368944368907, |
|
"eval_loss": 0.9319880604743958, |
|
"eval_runtime": 4173.211, |
|
"eval_samples_per_second": 24.726, |
|
"eval_steps_per_second": 3.091, |
|
"step": 17215 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 5.072463768115943e-05, |
|
"loss": 0.9786, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"eval_accuracy": 0.7913904624057875, |
|
"eval_loss": 0.9292972683906555, |
|
"eval_runtime": 4172.1427, |
|
"eval_samples_per_second": 24.732, |
|
"eval_steps_per_second": 3.092, |
|
"step": 17528 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"eval_accuracy": 0.7932201930680667, |
|
"eval_loss": 0.9203895330429077, |
|
"eval_runtime": 4172.1933, |
|
"eval_samples_per_second": 24.732, |
|
"eval_steps_per_second": 3.092, |
|
"step": 17841 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 4.899930986887509e-05, |
|
"loss": 0.9688, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"eval_accuracy": 0.7938784128863696, |
|
"eval_loss": 0.9155610799789429, |
|
"eval_runtime": 4171.4989, |
|
"eval_samples_per_second": 24.736, |
|
"eval_steps_per_second": 3.092, |
|
"step": 18154 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_accuracy": 0.7952068414816341, |
|
"eval_loss": 0.9091127514839172, |
|
"eval_runtime": 4177.1927, |
|
"eval_samples_per_second": 24.702, |
|
"eval_steps_per_second": 3.088, |
|
"step": 18467 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 4.727398205659075e-05, |
|
"loss": 0.9576, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"eval_accuracy": 0.7959312809475213, |
|
"eval_loss": 0.9035018682479858, |
|
"eval_runtime": 4178.199, |
|
"eval_samples_per_second": 24.697, |
|
"eval_steps_per_second": 3.087, |
|
"step": 18780 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 4.554865424430642e-05, |
|
"loss": 0.9489, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_accuracy": 0.7968186229313537, |
|
"eval_loss": 0.8974489569664001, |
|
"eval_runtime": 4175.3753, |
|
"eval_samples_per_second": 24.713, |
|
"eval_steps_per_second": 3.089, |
|
"step": 19093 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_accuracy": 0.7980068894657456, |
|
"eval_loss": 0.8927856087684631, |
|
"eval_runtime": 4174.5551, |
|
"eval_samples_per_second": 24.718, |
|
"eval_steps_per_second": 3.09, |
|
"step": 19406 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 4.382332643202209e-05, |
|
"loss": 0.9384, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"eval_accuracy": 0.7989207834455775, |
|
"eval_loss": 0.8878790140151978, |
|
"eval_runtime": 4176.0286, |
|
"eval_samples_per_second": 24.709, |
|
"eval_steps_per_second": 3.089, |
|
"step": 19719 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 4.209799861973775e-05, |
|
"loss": 0.9292, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"eval_accuracy": 0.7995861067424526, |
|
"eval_loss": 0.8841228485107422, |
|
"eval_runtime": 4174.1874, |
|
"eval_samples_per_second": 24.72, |
|
"eval_steps_per_second": 3.09, |
|
"step": 20032 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"eval_accuracy": 0.8005012964274263, |
|
"eval_loss": 0.8798208236694336, |
|
"eval_runtime": 4177.291, |
|
"eval_samples_per_second": 24.702, |
|
"eval_steps_per_second": 3.088, |
|
"step": 20345 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 4.0372670807453414e-05, |
|
"loss": 0.921, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"eval_accuracy": 0.8013221542890863, |
|
"eval_loss": 0.8738238215446472, |
|
"eval_runtime": 4179.3089, |
|
"eval_samples_per_second": 24.69, |
|
"eval_steps_per_second": 3.086, |
|
"step": 20658 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"eval_accuracy": 0.8020292962858104, |
|
"eval_loss": 0.8702828288078308, |
|
"eval_runtime": 4183.644, |
|
"eval_samples_per_second": 24.664, |
|
"eval_steps_per_second": 3.083, |
|
"step": 20971 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 3.864734299516908e-05, |
|
"loss": 0.9142, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"eval_accuracy": 0.8031108659339165, |
|
"eval_loss": 0.8653974533081055, |
|
"eval_runtime": 4177.1307, |
|
"eval_samples_per_second": 24.703, |
|
"eval_steps_per_second": 3.088, |
|
"step": 21284 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 3.692201518288475e-05, |
|
"loss": 0.9059, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"eval_accuracy": 0.803630800811672, |
|
"eval_loss": 0.8622693419456482, |
|
"eval_runtime": 4177.9531, |
|
"eval_samples_per_second": 24.698, |
|
"eval_steps_per_second": 3.087, |
|
"step": 21597 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_accuracy": 0.8044488106784644, |
|
"eval_loss": 0.8577073812484741, |
|
"eval_runtime": 4179.042, |
|
"eval_samples_per_second": 24.692, |
|
"eval_steps_per_second": 3.087, |
|
"step": 21910 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 3.519668737060042e-05, |
|
"loss": 0.9, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"eval_accuracy": 0.8051737023740322, |
|
"eval_loss": 0.8543989062309265, |
|
"eval_runtime": 4184.397, |
|
"eval_samples_per_second": 24.66, |
|
"eval_steps_per_second": 3.083, |
|
"step": 22223 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 3.347135955831608e-05, |
|
"loss": 0.8938, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8057043385627138, |
|
"eval_loss": 0.8503552079200745, |
|
"eval_runtime": 4183.5003, |
|
"eval_samples_per_second": 24.665, |
|
"eval_steps_per_second": 3.083, |
|
"step": 22536 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"eval_accuracy": 0.8066939882795919, |
|
"eval_loss": 0.8455684781074524, |
|
"eval_runtime": 4184.5066, |
|
"eval_samples_per_second": 24.659, |
|
"eval_steps_per_second": 3.083, |
|
"step": 22849 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 3.1746031746031745e-05, |
|
"loss": 0.8863, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"eval_accuracy": 0.8068218962399752, |
|
"eval_loss": 0.8439931273460388, |
|
"eval_runtime": 4178.7482, |
|
"eval_samples_per_second": 24.693, |
|
"eval_steps_per_second": 3.087, |
|
"step": 23162 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"eval_accuracy": 0.8075830543867027, |
|
"eval_loss": 0.8405274152755737, |
|
"eval_runtime": 4175.082, |
|
"eval_samples_per_second": 24.715, |
|
"eval_steps_per_second": 3.09, |
|
"step": 23475 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 3.0020703933747414e-05, |
|
"loss": 0.8804, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"eval_accuracy": 0.8083236702044754, |
|
"eval_loss": 0.8358407616615295, |
|
"eval_runtime": 4173.8003, |
|
"eval_samples_per_second": 24.723, |
|
"eval_steps_per_second": 3.09, |
|
"step": 23788 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 2.829537612146308e-05, |
|
"loss": 0.8743, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"eval_accuracy": 0.8087474534271941, |
|
"eval_loss": 0.8344011306762695, |
|
"eval_runtime": 4174.341, |
|
"eval_samples_per_second": 24.719, |
|
"eval_steps_per_second": 3.09, |
|
"step": 24101 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"eval_accuracy": 0.8092334825727566, |
|
"eval_loss": 0.8311472535133362, |
|
"eval_runtime": 4174.6403, |
|
"eval_samples_per_second": 24.718, |
|
"eval_steps_per_second": 3.09, |
|
"step": 24414 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 2.6570048309178748e-05, |
|
"loss": 0.8706, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"eval_accuracy": 0.8097197803323306, |
|
"eval_loss": 0.8285520672798157, |
|
"eval_runtime": 4174.1958, |
|
"eval_samples_per_second": 24.72, |
|
"eval_steps_per_second": 3.09, |
|
"step": 24727 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 2.484472049689441e-05, |
|
"loss": 0.8645, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"eval_accuracy": 0.810216617587347, |
|
"eval_loss": 0.8253086805343628, |
|
"eval_runtime": 4174.8315, |
|
"eval_samples_per_second": 24.716, |
|
"eval_steps_per_second": 3.09, |
|
"step": 25040 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"eval_accuracy": 0.8111007973238641, |
|
"eval_loss": 0.8211511373519897, |
|
"eval_runtime": 4173.759, |
|
"eval_samples_per_second": 24.723, |
|
"eval_steps_per_second": 3.09, |
|
"step": 25353 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 2.311939268461008e-05, |
|
"loss": 0.8602, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"eval_accuracy": 0.8111680475751303, |
|
"eval_loss": 0.8205570578575134, |
|
"eval_runtime": 4174.1596, |
|
"eval_samples_per_second": 24.72, |
|
"eval_steps_per_second": 3.09, |
|
"step": 25666 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"eval_accuracy": 0.8120529685373788, |
|
"eval_loss": 0.8158560991287231, |
|
"eval_runtime": 4175.0585, |
|
"eval_samples_per_second": 24.715, |
|
"eval_steps_per_second": 3.09, |
|
"step": 25979 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 2.139406487232574e-05, |
|
"loss": 0.8538, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"eval_accuracy": 0.8122099938117417, |
|
"eval_loss": 0.814548134803772, |
|
"eval_runtime": 4175.6451, |
|
"eval_samples_per_second": 24.712, |
|
"eval_steps_per_second": 3.089, |
|
"step": 26292 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 1.966873706004141e-05, |
|
"loss": 0.8482, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"eval_accuracy": 0.8130589470835723, |
|
"eval_loss": 0.8115074634552002, |
|
"eval_runtime": 4173.7083, |
|
"eval_samples_per_second": 24.723, |
|
"eval_steps_per_second": 3.091, |
|
"step": 26605 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"eval_accuracy": 0.8134356257072347, |
|
"eval_loss": 0.8089998364448547, |
|
"eval_runtime": 4174.6745, |
|
"eval_samples_per_second": 24.717, |
|
"eval_steps_per_second": 3.09, |
|
"step": 26918 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 1.7943409247757076e-05, |
|
"loss": 0.8488, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"eval_accuracy": 0.8134433401076823, |
|
"eval_loss": 0.8088431358337402, |
|
"eval_runtime": 4174.8389, |
|
"eval_samples_per_second": 24.716, |
|
"eval_steps_per_second": 3.09, |
|
"step": 27231 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 1.621808143547274e-05, |
|
"loss": 0.8423, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"eval_accuracy": 0.8137619275792424, |
|
"eval_loss": 0.8057170510292053, |
|
"eval_runtime": 4176.6965, |
|
"eval_samples_per_second": 24.705, |
|
"eval_steps_per_second": 3.088, |
|
"step": 27544 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"eval_accuracy": 0.8144356879652452, |
|
"eval_loss": 0.8039630651473999, |
|
"eval_runtime": 4175.7093, |
|
"eval_samples_per_second": 24.711, |
|
"eval_steps_per_second": 3.089, |
|
"step": 27857 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 1.4492753623188407e-05, |
|
"loss": 0.8396, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"eval_accuracy": 0.8145382455874637, |
|
"eval_loss": 0.8026472926139832, |
|
"eval_runtime": 4180.1354, |
|
"eval_samples_per_second": 24.685, |
|
"eval_steps_per_second": 3.086, |
|
"step": 28170 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"eval_accuracy": 0.8152832629079341, |
|
"eval_loss": 0.7988529205322266, |
|
"eval_runtime": 4179.9334, |
|
"eval_samples_per_second": 24.686, |
|
"eval_steps_per_second": 3.086, |
|
"step": 28483 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 1.276742581090407e-05, |
|
"loss": 0.8377, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"eval_accuracy": 0.8155252212471398, |
|
"eval_loss": 0.7979427576065063, |
|
"eval_runtime": 4178.4294, |
|
"eval_samples_per_second": 24.695, |
|
"eval_steps_per_second": 3.087, |
|
"step": 28796 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 1.1042097998619738e-05, |
|
"loss": 0.8319, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"eval_accuracy": 0.8159056442908116, |
|
"eval_loss": 0.7969533801078796, |
|
"eval_runtime": 4174.229, |
|
"eval_samples_per_second": 24.72, |
|
"eval_steps_per_second": 3.09, |
|
"step": 29109 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"eval_accuracy": 0.816157387607754, |
|
"eval_loss": 0.7945725917816162, |
|
"eval_runtime": 4172.4325, |
|
"eval_samples_per_second": 24.731, |
|
"eval_steps_per_second": 3.091, |
|
"step": 29422 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 9.316770186335403e-06, |
|
"loss": 0.8262, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"eval_accuracy": 0.8161445368180659, |
|
"eval_loss": 0.7940195202827454, |
|
"eval_runtime": 4171.791, |
|
"eval_samples_per_second": 24.734, |
|
"eval_steps_per_second": 3.092, |
|
"step": 29735 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 7.591442374051071e-06, |
|
"loss": 0.8255, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"eval_accuracy": 0.8167732822776922, |
|
"eval_loss": 0.7918646931648254, |
|
"eval_runtime": 4178.9618, |
|
"eval_samples_per_second": 24.692, |
|
"eval_steps_per_second": 3.087, |
|
"step": 30048 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_accuracy": 0.8168878384273093, |
|
"eval_loss": 0.7914555072784424, |
|
"eval_runtime": 4176.3937, |
|
"eval_samples_per_second": 24.707, |
|
"eval_steps_per_second": 3.089, |
|
"step": 30361 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 5.866114561766736e-06, |
|
"loss": 0.824, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"eval_accuracy": 0.817306118390898, |
|
"eval_loss": 0.7896197438240051, |
|
"eval_runtime": 4176.9927, |
|
"eval_samples_per_second": 24.704, |
|
"eval_steps_per_second": 3.088, |
|
"step": 30674 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"eval_accuracy": 0.8175610442353789, |
|
"eval_loss": 0.7872260212898254, |
|
"eval_runtime": 4173.6592, |
|
"eval_samples_per_second": 24.723, |
|
"eval_steps_per_second": 3.091, |
|
"step": 30987 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 4.140786749482402e-06, |
|
"loss": 0.8218, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"eval_accuracy": 0.8176242146063594, |
|
"eval_loss": 0.7876725792884827, |
|
"eval_runtime": 4174.4186, |
|
"eval_samples_per_second": 24.719, |
|
"eval_steps_per_second": 3.09, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 2.4154589371980677e-06, |
|
"loss": 0.8204, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"eval_accuracy": 0.8176433057924909, |
|
"eval_loss": 0.7873775959014893, |
|
"eval_runtime": 4173.4522, |
|
"eval_samples_per_second": 24.725, |
|
"eval_steps_per_second": 3.091, |
|
"step": 31613 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"eval_accuracy": 0.8180999846708549, |
|
"eval_loss": 0.7846992611885071, |
|
"eval_runtime": 4174.7361, |
|
"eval_samples_per_second": 24.717, |
|
"eval_steps_per_second": 3.09, |
|
"step": 31926 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 6.901311249137336e-07, |
|
"loss": 0.8177, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 32200, |
|
"total_flos": 7.683327485823698e+18, |
|
"train_loss": 1.915723304985473, |
|
"train_runtime": 1372706.2896, |
|
"train_samples_per_second": 6.006, |
|
"train_steps_per_second": 0.023 |
|
} |
|
], |
|
"max_steps": 32200, |
|
"num_train_epochs": 10, |
|
"total_flos": 7.683327485823698e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|