|
{ |
|
"best_metric": 0.8284289836883545, |
|
"best_model_checkpoint": "/output/longformer-large-4096-scratch-mlm-zgt-radpat/checkpoint-31300", |
|
"epoch": 9.999175145683829, |
|
"global_step": 32200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.12266339212733007, |
|
"eval_loss": 6.975634574890137, |
|
"eval_runtime": 6116.0321, |
|
"eval_samples_per_second": 16.872, |
|
"eval_steps_per_second": 4.218, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.5527950310559007e-05, |
|
"loss": 7.8221, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.15375473317214908, |
|
"eval_loss": 6.221883773803711, |
|
"eval_runtime": 6119.4463, |
|
"eval_samples_per_second": 16.862, |
|
"eval_steps_per_second": 4.216, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.16615663705726413, |
|
"eval_loss": 6.070300102233887, |
|
"eval_runtime": 6123.864, |
|
"eval_samples_per_second": 16.85, |
|
"eval_steps_per_second": 4.213, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.1055900621118014e-05, |
|
"loss": 6.2078, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.17121056433657572, |
|
"eval_loss": 5.859891414642334, |
|
"eval_runtime": 6127.1504, |
|
"eval_samples_per_second": 16.841, |
|
"eval_steps_per_second": 4.21, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.658385093167702e-05, |
|
"loss": 5.8885, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.2018564191205144, |
|
"eval_loss": 5.480071544647217, |
|
"eval_runtime": 6125.2208, |
|
"eval_samples_per_second": 16.846, |
|
"eval_steps_per_second": 4.212, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.28117280447637577, |
|
"eval_loss": 4.865741729736328, |
|
"eval_runtime": 6125.2116, |
|
"eval_samples_per_second": 16.846, |
|
"eval_steps_per_second": 4.212, |
|
"step": 1878 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.211180124223603e-05, |
|
"loss": 5.222, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.3429139079332977, |
|
"eval_loss": 4.355594158172607, |
|
"eval_runtime": 6130.4382, |
|
"eval_samples_per_second": 16.832, |
|
"eval_steps_per_second": 4.208, |
|
"step": 2191 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.763975155279503e-05, |
|
"loss": 4.4722, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.40118303502111985, |
|
"eval_loss": 3.8668248653411865, |
|
"eval_runtime": 6127.9717, |
|
"eval_samples_per_second": 16.839, |
|
"eval_steps_per_second": 4.21, |
|
"step": 2504 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.5023100325479124, |
|
"eval_loss": 3.0883595943450928, |
|
"eval_runtime": 6125.5473, |
|
"eval_samples_per_second": 16.845, |
|
"eval_steps_per_second": 4.211, |
|
"step": 2817 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.316770186335404e-05, |
|
"loss": 3.4756, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.5704291572279797, |
|
"eval_loss": 2.500981569290161, |
|
"eval_runtime": 6130.9045, |
|
"eval_samples_per_second": 16.831, |
|
"eval_steps_per_second": 4.208, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.6179390492627002, |
|
"eval_loss": 2.098602771759033, |
|
"eval_runtime": 6130.0855, |
|
"eval_samples_per_second": 16.833, |
|
"eval_steps_per_second": 4.208, |
|
"step": 3443 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.903381642512077e-05, |
|
"loss": 2.473, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_accuracy": 0.6461125725768823, |
|
"eval_loss": 1.8769867420196533, |
|
"eval_runtime": 6125.6206, |
|
"eval_samples_per_second": 16.845, |
|
"eval_steps_per_second": 4.211, |
|
"step": 3756 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 9.730848861283644e-05, |
|
"loss": 1.9842, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_accuracy": 0.6658163018931873, |
|
"eval_loss": 1.7306807041168213, |
|
"eval_runtime": 6126.2778, |
|
"eval_samples_per_second": 16.843, |
|
"eval_steps_per_second": 4.211, |
|
"step": 4069 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_accuracy": 0.6793036581603035, |
|
"eval_loss": 1.6312057971954346, |
|
"eval_runtime": 6129.7016, |
|
"eval_samples_per_second": 16.834, |
|
"eval_steps_per_second": 4.209, |
|
"step": 4382 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 9.558316080055211e-05, |
|
"loss": 1.7588, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_accuracy": 0.6910826606245232, |
|
"eval_loss": 1.5486171245574951, |
|
"eval_runtime": 6126.9734, |
|
"eval_samples_per_second": 16.841, |
|
"eval_steps_per_second": 4.21, |
|
"step": 4695 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.385783298826778e-05, |
|
"loss": 1.6227, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_accuracy": 0.7005288313548309, |
|
"eval_loss": 1.4852144718170166, |
|
"eval_runtime": 6125.5835, |
|
"eval_samples_per_second": 16.845, |
|
"eval_steps_per_second": 4.211, |
|
"step": 5008 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_accuracy": 0.7085754746399128, |
|
"eval_loss": 1.4299465417861938, |
|
"eval_runtime": 6127.5749, |
|
"eval_samples_per_second": 16.84, |
|
"eval_steps_per_second": 4.21, |
|
"step": 5321 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 9.213250517598345e-05, |
|
"loss": 1.5262, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.714780717522465, |
|
"eval_loss": 1.3879172801971436, |
|
"eval_runtime": 6127.0567, |
|
"eval_samples_per_second": 16.841, |
|
"eval_steps_per_second": 4.21, |
|
"step": 5634 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy": 0.7206512153561178, |
|
"eval_loss": 1.3517948389053345, |
|
"eval_runtime": 6124.3738, |
|
"eval_samples_per_second": 16.849, |
|
"eval_steps_per_second": 4.212, |
|
"step": 5947 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 9.04071773636991e-05, |
|
"loss": 1.4504, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.7259066376655939, |
|
"eval_loss": 1.3164656162261963, |
|
"eval_runtime": 6118.789, |
|
"eval_samples_per_second": 16.864, |
|
"eval_steps_per_second": 4.216, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 8.868184955141477e-05, |
|
"loss": 1.3953, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_accuracy": 0.730818673639555, |
|
"eval_loss": 1.285917043685913, |
|
"eval_runtime": 6120.6477, |
|
"eval_samples_per_second": 16.859, |
|
"eval_steps_per_second": 4.215, |
|
"step": 6573 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.734559869460255, |
|
"eval_loss": 1.2613903284072876, |
|
"eval_runtime": 6121.6933, |
|
"eval_samples_per_second": 16.856, |
|
"eval_steps_per_second": 4.214, |
|
"step": 6886 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.695652173913044e-05, |
|
"loss": 1.3444, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_accuracy": 0.7384604131666711, |
|
"eval_loss": 1.236000895500183, |
|
"eval_runtime": 6122.4897, |
|
"eval_samples_per_second": 16.854, |
|
"eval_steps_per_second": 4.213, |
|
"step": 7199 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 8.523119392684611e-05, |
|
"loss": 1.3047, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_accuracy": 0.7415128788148121, |
|
"eval_loss": 1.2168104648590088, |
|
"eval_runtime": 6126.7013, |
|
"eval_samples_per_second": 16.842, |
|
"eval_steps_per_second": 4.211, |
|
"step": 7512 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_accuracy": 0.7450029545595697, |
|
"eval_loss": 1.1964406967163086, |
|
"eval_runtime": 6123.0501, |
|
"eval_samples_per_second": 16.852, |
|
"eval_steps_per_second": 4.213, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.350586611456177e-05, |
|
"loss": 1.2713, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.7467751766581295, |
|
"eval_loss": 1.1841331720352173, |
|
"eval_runtime": 6122.3999, |
|
"eval_samples_per_second": 16.854, |
|
"eval_steps_per_second": 4.214, |
|
"step": 8138 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_accuracy": 0.750416850580808, |
|
"eval_loss": 1.1633927822113037, |
|
"eval_runtime": 6127.4486, |
|
"eval_samples_per_second": 16.84, |
|
"eval_steps_per_second": 4.21, |
|
"step": 8451 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 8.178053830227743e-05, |
|
"loss": 1.2431, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_accuracy": 0.7527193981891372, |
|
"eval_loss": 1.146986722946167, |
|
"eval_runtime": 6131.9044, |
|
"eval_samples_per_second": 16.828, |
|
"eval_steps_per_second": 4.207, |
|
"step": 8764 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 8.00552104899931e-05, |
|
"loss": 1.2164, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.7551538736391906, |
|
"eval_loss": 1.132608413696289, |
|
"eval_runtime": 6121.9035, |
|
"eval_samples_per_second": 16.855, |
|
"eval_steps_per_second": 4.214, |
|
"step": 9077 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_accuracy": 0.7571211907517355, |
|
"eval_loss": 1.1203465461730957, |
|
"eval_runtime": 6121.527, |
|
"eval_samples_per_second": 16.856, |
|
"eval_steps_per_second": 4.214, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 7.832988267770877e-05, |
|
"loss": 1.1951, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_accuracy": 0.7589963980672606, |
|
"eval_loss": 1.1114239692687988, |
|
"eval_runtime": 6126.4612, |
|
"eval_samples_per_second": 16.843, |
|
"eval_steps_per_second": 4.211, |
|
"step": 9703 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 7.660455486542444e-05, |
|
"loss": 1.1705, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"eval_accuracy": 0.7612426818924412, |
|
"eval_loss": 1.0974253416061401, |
|
"eval_runtime": 6122.547, |
|
"eval_samples_per_second": 16.854, |
|
"eval_steps_per_second": 4.213, |
|
"step": 10016 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_accuracy": 0.7631302412738202, |
|
"eval_loss": 1.0867012739181519, |
|
"eval_runtime": 6126.2709, |
|
"eval_samples_per_second": 16.843, |
|
"eval_steps_per_second": 4.211, |
|
"step": 10329 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 7.48792270531401e-05, |
|
"loss": 1.1516, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_accuracy": 0.7646139267496522, |
|
"eval_loss": 1.0770790576934814, |
|
"eval_runtime": 6130.429, |
|
"eval_samples_per_second": 16.832, |
|
"eval_steps_per_second": 4.208, |
|
"step": 10642 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_accuracy": 0.7660438596581639, |
|
"eval_loss": 1.0668072700500488, |
|
"eval_runtime": 6129.5434, |
|
"eval_samples_per_second": 16.834, |
|
"eval_steps_per_second": 4.209, |
|
"step": 10955 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 7.315389924085577e-05, |
|
"loss": 1.1345, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_accuracy": 0.7675726293257004, |
|
"eval_loss": 1.05952787399292, |
|
"eval_runtime": 6126.4998, |
|
"eval_samples_per_second": 16.843, |
|
"eval_steps_per_second": 4.211, |
|
"step": 11268 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 7.142857142857143e-05, |
|
"loss": 1.1192, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_accuracy": 0.7694602055551931, |
|
"eval_loss": 1.0479472875595093, |
|
"eval_runtime": 6127.4827, |
|
"eval_samples_per_second": 16.84, |
|
"eval_steps_per_second": 4.21, |
|
"step": 11581 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_accuracy": 0.7707531140981431, |
|
"eval_loss": 1.0423223972320557, |
|
"eval_runtime": 6131.6585, |
|
"eval_samples_per_second": 16.829, |
|
"eval_steps_per_second": 4.207, |
|
"step": 11894 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 6.970324361628709e-05, |
|
"loss": 1.106, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_accuracy": 0.7719773558500885, |
|
"eval_loss": 1.0328373908996582, |
|
"eval_runtime": 6128.1273, |
|
"eval_samples_per_second": 16.838, |
|
"eval_steps_per_second": 4.21, |
|
"step": 12207 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 6.797791580400277e-05, |
|
"loss": 1.0916, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_accuracy": 0.7731614368018522, |
|
"eval_loss": 1.0272808074951172, |
|
"eval_runtime": 6120.3326, |
|
"eval_samples_per_second": 16.86, |
|
"eval_steps_per_second": 4.215, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_accuracy": 0.7742511503011699, |
|
"eval_loss": 1.0189120769500732, |
|
"eval_runtime": 6131.1757, |
|
"eval_samples_per_second": 16.83, |
|
"eval_steps_per_second": 4.208, |
|
"step": 12833 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 6.625258799171843e-05, |
|
"loss": 1.0789, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_accuracy": 0.7757384860054987, |
|
"eval_loss": 1.0113306045532227, |
|
"eval_runtime": 6133.3354, |
|
"eval_samples_per_second": 16.824, |
|
"eval_steps_per_second": 4.206, |
|
"step": 13146 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_accuracy": 0.776816006797112, |
|
"eval_loss": 1.0058414936065674, |
|
"eval_runtime": 6130.4902, |
|
"eval_samples_per_second": 16.832, |
|
"eval_steps_per_second": 4.208, |
|
"step": 13459 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 6.45272601794341e-05, |
|
"loss": 1.0631, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_accuracy": 0.7777869709950421, |
|
"eval_loss": 1.000064730644226, |
|
"eval_runtime": 6129.8863, |
|
"eval_samples_per_second": 16.833, |
|
"eval_steps_per_second": 4.208, |
|
"step": 13772 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 6.280193236714976e-05, |
|
"loss": 1.0557, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"eval_accuracy": 0.778843659908514, |
|
"eval_loss": 0.993532121181488, |
|
"eval_runtime": 6126.5895, |
|
"eval_samples_per_second": 16.842, |
|
"eval_steps_per_second": 4.211, |
|
"step": 14085 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"eval_accuracy": 0.7797456195039035, |
|
"eval_loss": 0.9887062311172485, |
|
"eval_runtime": 6127.2121, |
|
"eval_samples_per_second": 16.841, |
|
"eval_steps_per_second": 4.21, |
|
"step": 14398 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 6.107660455486542e-05, |
|
"loss": 1.0438, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"eval_accuracy": 0.7807731355140578, |
|
"eval_loss": 0.9825865030288696, |
|
"eval_runtime": 6126.4985, |
|
"eval_samples_per_second": 16.843, |
|
"eval_steps_per_second": 4.211, |
|
"step": 14711 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 5.9351276742581096e-05, |
|
"loss": 1.0361, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"eval_accuracy": 0.7818996676870377, |
|
"eval_loss": 0.9763655662536621, |
|
"eval_runtime": 6127.4496, |
|
"eval_samples_per_second": 16.84, |
|
"eval_steps_per_second": 4.21, |
|
"step": 15024 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"eval_accuracy": 0.782940768919716, |
|
"eval_loss": 0.9697893857955933, |
|
"eval_runtime": 6126.6806, |
|
"eval_samples_per_second": 16.842, |
|
"eval_steps_per_second": 4.211, |
|
"step": 15337 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 5.762594893029676e-05, |
|
"loss": 1.0264, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"eval_accuracy": 0.7841247808628483, |
|
"eval_loss": 0.9644368290901184, |
|
"eval_runtime": 6128.2176, |
|
"eval_samples_per_second": 16.838, |
|
"eval_steps_per_second": 4.21, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_accuracy": 0.7846301810721098, |
|
"eval_loss": 0.9614962339401245, |
|
"eval_runtime": 6132.4257, |
|
"eval_samples_per_second": 16.826, |
|
"eval_steps_per_second": 4.207, |
|
"step": 15963 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 5.590062111801242e-05, |
|
"loss": 1.0176, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_accuracy": 0.7858738693048405, |
|
"eval_loss": 0.9536014795303345, |
|
"eval_runtime": 6134.6669, |
|
"eval_samples_per_second": 16.82, |
|
"eval_steps_per_second": 4.205, |
|
"step": 16276 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 5.417529330572809e-05, |
|
"loss": 1.007, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"eval_accuracy": 0.7867571419814423, |
|
"eval_loss": 0.9484899044036865, |
|
"eval_runtime": 6130.582, |
|
"eval_samples_per_second": 16.832, |
|
"eval_steps_per_second": 4.208, |
|
"step": 16589 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"eval_accuracy": 0.7867586112749555, |
|
"eval_loss": 0.9482876658439636, |
|
"eval_runtime": 6124.9513, |
|
"eval_samples_per_second": 16.847, |
|
"eval_steps_per_second": 4.212, |
|
"step": 16902 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 5.244996549344375e-05, |
|
"loss": 0.9965, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"eval_accuracy": 0.7880718537015102, |
|
"eval_loss": 0.9402521848678589, |
|
"eval_runtime": 6133.8455, |
|
"eval_samples_per_second": 16.823, |
|
"eval_steps_per_second": 4.206, |
|
"step": 17215 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 5.072463768115943e-05, |
|
"loss": 0.9911, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"eval_accuracy": 0.7888353320614213, |
|
"eval_loss": 0.9360187649726868, |
|
"eval_runtime": 6131.2854, |
|
"eval_samples_per_second": 16.83, |
|
"eval_steps_per_second": 4.207, |
|
"step": 17528 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"eval_accuracy": 0.7896846598862644, |
|
"eval_loss": 0.9315310120582581, |
|
"eval_runtime": 6130.221, |
|
"eval_samples_per_second": 16.833, |
|
"eval_steps_per_second": 4.208, |
|
"step": 17841 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 4.899930986887509e-05, |
|
"loss": 0.9861, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"eval_accuracy": 0.7902251551194575, |
|
"eval_loss": 0.9286208152770996, |
|
"eval_runtime": 6135.9888, |
|
"eval_samples_per_second": 16.817, |
|
"eval_steps_per_second": 4.204, |
|
"step": 18154 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_accuracy": 0.7910160835517881, |
|
"eval_loss": 0.9242651462554932, |
|
"eval_runtime": 6134.4232, |
|
"eval_samples_per_second": 16.821, |
|
"eval_steps_per_second": 4.205, |
|
"step": 18467 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 4.727398205659075e-05, |
|
"loss": 0.9787, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"eval_accuracy": 0.7916774902149969, |
|
"eval_loss": 0.9199575185775757, |
|
"eval_runtime": 6127.6258, |
|
"eval_samples_per_second": 16.84, |
|
"eval_steps_per_second": 4.21, |
|
"step": 18780 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 4.554865424430642e-05, |
|
"loss": 0.972, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_accuracy": 0.7921690081239334, |
|
"eval_loss": 0.9167630076408386, |
|
"eval_runtime": 6121.9416, |
|
"eval_samples_per_second": 16.855, |
|
"eval_steps_per_second": 4.214, |
|
"step": 19093 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_accuracy": 0.7929045391491827, |
|
"eval_loss": 0.9131466150283813, |
|
"eval_runtime": 6136.433, |
|
"eval_samples_per_second": 16.815, |
|
"eval_steps_per_second": 4.204, |
|
"step": 19406 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 4.382332643202209e-05, |
|
"loss": 0.9642, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"eval_accuracy": 0.7933599893983608, |
|
"eval_loss": 0.9112694263458252, |
|
"eval_runtime": 6128.5893, |
|
"eval_samples_per_second": 16.837, |
|
"eval_steps_per_second": 4.209, |
|
"step": 19719 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 4.209799861973775e-05, |
|
"loss": 0.9576, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"eval_accuracy": 0.7940601199523715, |
|
"eval_loss": 0.9060889482498169, |
|
"eval_runtime": 6120.6148, |
|
"eval_samples_per_second": 16.859, |
|
"eval_steps_per_second": 4.215, |
|
"step": 20032 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"eval_accuracy": 0.7948685797545274, |
|
"eval_loss": 0.9030121564865112, |
|
"eval_runtime": 6124.1894, |
|
"eval_samples_per_second": 16.849, |
|
"eval_steps_per_second": 4.212, |
|
"step": 20345 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 4.0372670807453414e-05, |
|
"loss": 0.9514, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"eval_accuracy": 0.7954765058682228, |
|
"eval_loss": 0.8997820615768433, |
|
"eval_runtime": 6126.3307, |
|
"eval_samples_per_second": 16.843, |
|
"eval_steps_per_second": 4.211, |
|
"step": 20658 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"eval_accuracy": 0.7961196847197146, |
|
"eval_loss": 0.8957119584083557, |
|
"eval_runtime": 6121.3143, |
|
"eval_samples_per_second": 16.857, |
|
"eval_steps_per_second": 4.214, |
|
"step": 20971 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 3.864734299516908e-05, |
|
"loss": 0.9457, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"eval_accuracy": 0.7966353338873807, |
|
"eval_loss": 0.8925579190254211, |
|
"eval_runtime": 6121.7054, |
|
"eval_samples_per_second": 16.856, |
|
"eval_steps_per_second": 4.214, |
|
"step": 21284 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 3.692201518288475e-05, |
|
"loss": 0.9411, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"eval_accuracy": 0.7968278874690401, |
|
"eval_loss": 0.8926752805709839, |
|
"eval_runtime": 6123.2773, |
|
"eval_samples_per_second": 16.852, |
|
"eval_steps_per_second": 4.213, |
|
"step": 21597 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_accuracy": 0.7974544355055755, |
|
"eval_loss": 0.8880347609519958, |
|
"eval_runtime": 6121.4872, |
|
"eval_samples_per_second": 16.857, |
|
"eval_steps_per_second": 4.214, |
|
"step": 21910 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 3.519668737060042e-05, |
|
"loss": 0.9349, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"eval_accuracy": 0.7982437294026129, |
|
"eval_loss": 0.8834199905395508, |
|
"eval_runtime": 6123.2699, |
|
"eval_samples_per_second": 16.852, |
|
"eval_steps_per_second": 4.213, |
|
"step": 22223 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 3.347135955831608e-05, |
|
"loss": 0.9319, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7990805845521158, |
|
"eval_loss": 0.8799129724502563, |
|
"eval_runtime": 6120.2145, |
|
"eval_samples_per_second": 16.86, |
|
"eval_steps_per_second": 4.215, |
|
"step": 22536 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"eval_accuracy": 0.7991272231482186, |
|
"eval_loss": 0.879518449306488, |
|
"eval_runtime": 6125.0222, |
|
"eval_samples_per_second": 16.847, |
|
"eval_steps_per_second": 4.212, |
|
"step": 22849 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 3.1746031746031745e-05, |
|
"loss": 0.9235, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"eval_accuracy": 0.7999484030167242, |
|
"eval_loss": 0.8756560683250427, |
|
"eval_runtime": 6127.211, |
|
"eval_samples_per_second": 16.841, |
|
"eval_steps_per_second": 4.21, |
|
"step": 23162 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"eval_accuracy": 0.8001440250718516, |
|
"eval_loss": 0.8739376068115234, |
|
"eval_runtime": 6134.261, |
|
"eval_samples_per_second": 16.821, |
|
"eval_steps_per_second": 4.205, |
|
"step": 23475 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 3.0020703933747414e-05, |
|
"loss": 0.9198, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"eval_accuracy": 0.8010690824018636, |
|
"eval_loss": 0.8693613409996033, |
|
"eval_runtime": 6132.2846, |
|
"eval_samples_per_second": 16.827, |
|
"eval_steps_per_second": 4.207, |
|
"step": 23788 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 2.829537612146308e-05, |
|
"loss": 0.9158, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"eval_accuracy": 0.8011952977602468, |
|
"eval_loss": 0.8689371943473816, |
|
"eval_runtime": 6129.2095, |
|
"eval_samples_per_second": 16.835, |
|
"eval_steps_per_second": 4.209, |
|
"step": 24101 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"eval_accuracy": 0.8017360324487328, |
|
"eval_loss": 0.8663704991340637, |
|
"eval_runtime": 6128.5565, |
|
"eval_samples_per_second": 16.837, |
|
"eval_steps_per_second": 4.209, |
|
"step": 24414 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 2.6570048309178748e-05, |
|
"loss": 0.9125, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"eval_accuracy": 0.8020007406811046, |
|
"eval_loss": 0.8649431467056274, |
|
"eval_runtime": 6132.8666, |
|
"eval_samples_per_second": 16.825, |
|
"eval_steps_per_second": 4.206, |
|
"step": 24727 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 2.484472049689441e-05, |
|
"loss": 0.9099, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"eval_accuracy": 0.8026024276561983, |
|
"eval_loss": 0.8605436086654663, |
|
"eval_runtime": 6126.7586, |
|
"eval_samples_per_second": 16.842, |
|
"eval_steps_per_second": 4.211, |
|
"step": 25040 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"eval_accuracy": 0.80301129412462, |
|
"eval_loss": 0.8582573533058167, |
|
"eval_runtime": 6127.3341, |
|
"eval_samples_per_second": 16.84, |
|
"eval_steps_per_second": 4.21, |
|
"step": 25353 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 2.311939268461008e-05, |
|
"loss": 0.9054, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"eval_accuracy": 0.8034071794966846, |
|
"eval_loss": 0.8573377132415771, |
|
"eval_runtime": 6131.9465, |
|
"eval_samples_per_second": 16.828, |
|
"eval_steps_per_second": 4.207, |
|
"step": 25666 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"eval_accuracy": 0.8038572222331624, |
|
"eval_loss": 0.8544816374778748, |
|
"eval_runtime": 6128.9922, |
|
"eval_samples_per_second": 16.836, |
|
"eval_steps_per_second": 4.209, |
|
"step": 25979 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 2.139406487232574e-05, |
|
"loss": 0.8998, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"eval_accuracy": 0.8044058818938022, |
|
"eval_loss": 0.8519273400306702, |
|
"eval_runtime": 6124.6473, |
|
"eval_samples_per_second": 16.848, |
|
"eval_steps_per_second": 4.212, |
|
"step": 26292 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 1.966873706004141e-05, |
|
"loss": 0.8939, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"eval_accuracy": 0.8044216416179728, |
|
"eval_loss": 0.8512473702430725, |
|
"eval_runtime": 6126.8526, |
|
"eval_samples_per_second": 16.842, |
|
"eval_steps_per_second": 4.21, |
|
"step": 26605 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"eval_accuracy": 0.804752442721678, |
|
"eval_loss": 0.8492391705513, |
|
"eval_runtime": 6127.2647, |
|
"eval_samples_per_second": 16.841, |
|
"eval_steps_per_second": 4.21, |
|
"step": 26918 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 1.7943409247757076e-05, |
|
"loss": 0.8942, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"eval_accuracy": 0.8051816524786768, |
|
"eval_loss": 0.8468219637870789, |
|
"eval_runtime": 6124.9306, |
|
"eval_samples_per_second": 16.847, |
|
"eval_steps_per_second": 4.212, |
|
"step": 27231 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 1.621808143547274e-05, |
|
"loss": 0.8904, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"eval_accuracy": 0.8055019757141467, |
|
"eval_loss": 0.8458420634269714, |
|
"eval_runtime": 6124.8245, |
|
"eval_samples_per_second": 16.847, |
|
"eval_steps_per_second": 4.212, |
|
"step": 27544 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"eval_accuracy": 0.8057308816675628, |
|
"eval_loss": 0.8443206548690796, |
|
"eval_runtime": 6129.9291, |
|
"eval_samples_per_second": 16.833, |
|
"eval_steps_per_second": 4.208, |
|
"step": 27857 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 1.4492753623188407e-05, |
|
"loss": 0.8862, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"eval_accuracy": 0.805897348183967, |
|
"eval_loss": 0.843222439289093, |
|
"eval_runtime": 6128.1919, |
|
"eval_samples_per_second": 16.838, |
|
"eval_steps_per_second": 4.21, |
|
"step": 28170 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"eval_accuracy": 0.8064984673341041, |
|
"eval_loss": 0.84042888879776, |
|
"eval_runtime": 6116.6369, |
|
"eval_samples_per_second": 16.87, |
|
"eval_steps_per_second": 4.218, |
|
"step": 28483 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 1.276742581090407e-05, |
|
"loss": 0.8842, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"eval_accuracy": 0.806853518328651, |
|
"eval_loss": 0.8381487727165222, |
|
"eval_runtime": 6118.9718, |
|
"eval_samples_per_second": 16.863, |
|
"eval_steps_per_second": 4.216, |
|
"step": 28796 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 1.1042097998619738e-05, |
|
"loss": 0.8812, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"eval_accuracy": 0.8070338579198731, |
|
"eval_loss": 0.8374488353729248, |
|
"eval_runtime": 6118.7308, |
|
"eval_samples_per_second": 16.864, |
|
"eval_steps_per_second": 4.216, |
|
"step": 29109 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"eval_accuracy": 0.8068436046687713, |
|
"eval_loss": 0.8375363945960999, |
|
"eval_runtime": 6128.5918, |
|
"eval_samples_per_second": 16.837, |
|
"eval_steps_per_second": 4.209, |
|
"step": 29422 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 9.316770186335403e-06, |
|
"loss": 0.8774, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"eval_accuracy": 0.8077565106716271, |
|
"eval_loss": 0.8336867094039917, |
|
"eval_runtime": 6119.8095, |
|
"eval_samples_per_second": 16.861, |
|
"eval_steps_per_second": 4.215, |
|
"step": 29735 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 7.591442374051071e-06, |
|
"loss": 0.8752, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"eval_accuracy": 0.8081288482769053, |
|
"eval_loss": 0.8320378661155701, |
|
"eval_runtime": 6119.7341, |
|
"eval_samples_per_second": 16.861, |
|
"eval_steps_per_second": 4.215, |
|
"step": 30048 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_accuracy": 0.8082356261550239, |
|
"eval_loss": 0.8310965299606323, |
|
"eval_runtime": 6119.6431, |
|
"eval_samples_per_second": 16.862, |
|
"eval_steps_per_second": 4.215, |
|
"step": 30361 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 5.866114561766736e-06, |
|
"loss": 0.8732, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"eval_accuracy": 0.8083999448820824, |
|
"eval_loss": 0.8303462266921997, |
|
"eval_runtime": 6118.4989, |
|
"eval_samples_per_second": 16.865, |
|
"eval_steps_per_second": 4.216, |
|
"step": 30674 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"eval_accuracy": 0.8084419046833061, |
|
"eval_loss": 0.8290849328041077, |
|
"eval_runtime": 6127.7892, |
|
"eval_samples_per_second": 16.839, |
|
"eval_steps_per_second": 4.21, |
|
"step": 30987 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 4.140786749482402e-06, |
|
"loss": 0.8715, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"eval_accuracy": 0.8088197529604327, |
|
"eval_loss": 0.8284289836883545, |
|
"eval_runtime": 6124.7156, |
|
"eval_samples_per_second": 16.848, |
|
"eval_steps_per_second": 4.212, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 2.4154589371980677e-06, |
|
"loss": 0.8705, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"eval_accuracy": 0.8085015827448934, |
|
"eval_loss": 0.8298270106315613, |
|
"eval_runtime": 6120.6207, |
|
"eval_samples_per_second": 16.859, |
|
"eval_steps_per_second": 4.215, |
|
"step": 31613 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"eval_accuracy": 0.8086080278025564, |
|
"eval_loss": 0.8285703659057617, |
|
"eval_runtime": 6122.3492, |
|
"eval_samples_per_second": 16.854, |
|
"eval_steps_per_second": 4.214, |
|
"step": 31926 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 6.901311249137336e-07, |
|
"loss": 0.8676, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 32200, |
|
"total_flos": 9.597056792179405e+18, |
|
"train_loss": 1.5035098029663845, |
|
"train_runtime": 2595975.6547, |
|
"train_samples_per_second": 3.176, |
|
"train_steps_per_second": 0.012 |
|
} |
|
], |
|
"max_steps": 32200, |
|
"num_train_epochs": 10, |
|
"total_flos": 9.597056792179405e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|