|
{ |
|
"best_metric": 0.6948868632316589, |
|
"best_model_checkpoint": "/output/zgt-roberta-base-finetuned-Gu21schedule-BS256-10ep/checkpoint-31926", |
|
"epoch": 9.999029597282872, |
|
"global_step": 32200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.15792088093844922, |
|
"eval_loss": 6.227477550506592, |
|
"eval_runtime": 1564.1073, |
|
"eval_samples_per_second": 65.972, |
|
"eval_steps_per_second": 4.124, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.316770186335403e-05, |
|
"loss": 7.1011, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.17130737364600243, |
|
"eval_loss": 6.035614967346191, |
|
"eval_runtime": 1566.1128, |
|
"eval_samples_per_second": 65.887, |
|
"eval_steps_per_second": 4.118, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.15421966595333422, |
|
"eval_loss": 5.970716953277588, |
|
"eval_runtime": 1567.0923, |
|
"eval_samples_per_second": 65.846, |
|
"eval_steps_per_second": 4.116, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00018633540372670805, |
|
"loss": 6.0182, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.17392094665213684, |
|
"eval_loss": 5.876271724700928, |
|
"eval_runtime": 1568.897, |
|
"eval_samples_per_second": 65.77, |
|
"eval_steps_per_second": 4.111, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002795031055900621, |
|
"loss": 5.8908, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.17560553688955177, |
|
"eval_loss": 5.8265557289123535, |
|
"eval_runtime": 1564.0767, |
|
"eval_samples_per_second": 65.973, |
|
"eval_steps_per_second": 4.124, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.1755758633601614, |
|
"eval_loss": 5.785298824310303, |
|
"eval_runtime": 1563.8448, |
|
"eval_samples_per_second": 65.983, |
|
"eval_steps_per_second": 4.124, |
|
"step": 1878 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0003726708074534161, |
|
"loss": 5.8105, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.1851031730080446, |
|
"eval_loss": 5.230679988861084, |
|
"eval_runtime": 1563.8141, |
|
"eval_samples_per_second": 65.984, |
|
"eval_steps_per_second": 4.125, |
|
"step": 2191 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0004658385093167702, |
|
"loss": 4.8469, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.49302250475931464, |
|
"eval_loss": 2.994337320327759, |
|
"eval_runtime": 1564.1944, |
|
"eval_samples_per_second": 65.968, |
|
"eval_steps_per_second": 4.124, |
|
"step": 2504 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.6404133912717029, |
|
"eval_loss": 1.8836123943328857, |
|
"eval_runtime": 1565.3247, |
|
"eval_samples_per_second": 65.921, |
|
"eval_steps_per_second": 4.121, |
|
"step": 2817 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0005590062111801242, |
|
"loss": 2.2431, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.6801897739457411, |
|
"eval_loss": 1.6012158393859863, |
|
"eval_runtime": 1567.0654, |
|
"eval_samples_per_second": 65.847, |
|
"eval_steps_per_second": 4.116, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.7009936840521221, |
|
"eval_loss": 1.4589687585830688, |
|
"eval_runtime": 1567.9702, |
|
"eval_samples_per_second": 65.809, |
|
"eval_steps_per_second": 4.114, |
|
"step": 3443 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0005942028985507245, |
|
"loss": 1.618, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_accuracy": 0.7143850078266079, |
|
"eval_loss": 1.3708570003509521, |
|
"eval_runtime": 1567.3868, |
|
"eval_samples_per_second": 65.834, |
|
"eval_steps_per_second": 4.115, |
|
"step": 3756 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0005838509316770186, |
|
"loss": 1.436, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_accuracy": 0.725278557944805, |
|
"eval_loss": 1.2998367547988892, |
|
"eval_runtime": 1564.9997, |
|
"eval_samples_per_second": 65.934, |
|
"eval_steps_per_second": 4.121, |
|
"step": 4069 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_accuracy": 0.7341513279268731, |
|
"eval_loss": 1.2506020069122314, |
|
"eval_runtime": 1565.2355, |
|
"eval_samples_per_second": 65.924, |
|
"eval_steps_per_second": 4.121, |
|
"step": 4382 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0005734989648033125, |
|
"loss": 1.3344, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_accuracy": 0.7414229730121367, |
|
"eval_loss": 1.2084842920303345, |
|
"eval_runtime": 1564.6672, |
|
"eval_samples_per_second": 65.948, |
|
"eval_steps_per_second": 4.122, |
|
"step": 4695 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.0005631469979296066, |
|
"loss": 1.2678, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_accuracy": 0.7460624134488423, |
|
"eval_loss": 1.1799527406692505, |
|
"eval_runtime": 1565.6909, |
|
"eval_samples_per_second": 65.905, |
|
"eval_steps_per_second": 4.12, |
|
"step": 5008 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_accuracy": 0.7513162543657942, |
|
"eval_loss": 1.1497138738632202, |
|
"eval_runtime": 1565.9769, |
|
"eval_samples_per_second": 65.893, |
|
"eval_steps_per_second": 4.119, |
|
"step": 5321 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0005527950310559006, |
|
"loss": 1.2183, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.7555471802775164, |
|
"eval_loss": 1.1244266033172607, |
|
"eval_runtime": 1565.313, |
|
"eval_samples_per_second": 65.921, |
|
"eval_steps_per_second": 4.121, |
|
"step": 5634 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy": 0.7586796535932161, |
|
"eval_loss": 1.1065826416015625, |
|
"eval_runtime": 1565.5955, |
|
"eval_samples_per_second": 65.909, |
|
"eval_steps_per_second": 4.12, |
|
"step": 5947 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.0005424430641821946, |
|
"loss": 1.1764, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.7621753158952852, |
|
"eval_loss": 1.0877457857131958, |
|
"eval_runtime": 1565.477, |
|
"eval_samples_per_second": 65.914, |
|
"eval_steps_per_second": 4.12, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.0005320910973084886, |
|
"loss": 1.1481, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_accuracy": 0.7653308256390273, |
|
"eval_loss": 1.069921851158142, |
|
"eval_runtime": 1565.5751, |
|
"eval_samples_per_second": 65.91, |
|
"eval_steps_per_second": 4.12, |
|
"step": 6573 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.7669783269978627, |
|
"eval_loss": 1.059142827987671, |
|
"eval_runtime": 1564.7195, |
|
"eval_samples_per_second": 65.946, |
|
"eval_steps_per_second": 4.122, |
|
"step": 6886 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.0005217391304347826, |
|
"loss": 1.1169, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_accuracy": 0.7701243077960133, |
|
"eval_loss": 1.0415377616882324, |
|
"eval_runtime": 1565.3407, |
|
"eval_samples_per_second": 65.92, |
|
"eval_steps_per_second": 4.121, |
|
"step": 7199 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0005113871635610765, |
|
"loss": 1.0953, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_accuracy": 0.7718881473585557, |
|
"eval_loss": 1.0299146175384521, |
|
"eval_runtime": 1565.1494, |
|
"eval_samples_per_second": 65.928, |
|
"eval_steps_per_second": 4.121, |
|
"step": 7512 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_accuracy": 0.7742740416938212, |
|
"eval_loss": 1.0169405937194824, |
|
"eval_runtime": 1565.1719, |
|
"eval_samples_per_second": 65.927, |
|
"eval_steps_per_second": 4.121, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0005010351966873705, |
|
"loss": 1.0743, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.7762836729268914, |
|
"eval_loss": 1.006744146347046, |
|
"eval_runtime": 1565.0712, |
|
"eval_samples_per_second": 65.931, |
|
"eval_steps_per_second": 4.121, |
|
"step": 8138 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_accuracy": 0.7780289191145969, |
|
"eval_loss": 0.9963796734809875, |
|
"eval_runtime": 1564.7218, |
|
"eval_samples_per_second": 65.946, |
|
"eval_steps_per_second": 4.122, |
|
"step": 8451 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.0004906832298136645, |
|
"loss": 1.0582, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_accuracy": 0.7799966088129119, |
|
"eval_loss": 0.9861342310905457, |
|
"eval_runtime": 1564.6716, |
|
"eval_samples_per_second": 65.948, |
|
"eval_steps_per_second": 4.122, |
|
"step": 8764 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.00048033126293995856, |
|
"loss": 1.0401, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.7818533899689635, |
|
"eval_loss": 0.9751449227333069, |
|
"eval_runtime": 1564.9804, |
|
"eval_samples_per_second": 65.935, |
|
"eval_steps_per_second": 4.121, |
|
"step": 9077 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_accuracy": 0.7830731640909905, |
|
"eval_loss": 0.9680520296096802, |
|
"eval_runtime": 1565.4431, |
|
"eval_samples_per_second": 65.916, |
|
"eval_steps_per_second": 4.12, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.00046997929606625255, |
|
"loss": 1.0262, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_accuracy": 0.7843024373004699, |
|
"eval_loss": 0.9606424570083618, |
|
"eval_runtime": 1565.1725, |
|
"eval_samples_per_second": 65.927, |
|
"eval_steps_per_second": 4.121, |
|
"step": 9703 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 0.0004596273291925466, |
|
"loss": 1.0093, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"eval_accuracy": 0.7858115501363246, |
|
"eval_loss": 0.9535852670669556, |
|
"eval_runtime": 1564.8587, |
|
"eval_samples_per_second": 65.94, |
|
"eval_steps_per_second": 4.122, |
|
"step": 10016 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_accuracy": 0.7874158219284931, |
|
"eval_loss": 0.9444663524627686, |
|
"eval_runtime": 1564.7558, |
|
"eval_samples_per_second": 65.944, |
|
"eval_steps_per_second": 4.122, |
|
"step": 10329 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.0004492753623188405, |
|
"loss": 0.995, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_accuracy": 0.7884454282264995, |
|
"eval_loss": 0.9367947578430176, |
|
"eval_runtime": 1565.5731, |
|
"eval_samples_per_second": 65.91, |
|
"eval_steps_per_second": 4.12, |
|
"step": 10642 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_accuracy": 0.7902011668190658, |
|
"eval_loss": 0.9284105896949768, |
|
"eval_runtime": 1564.8586, |
|
"eval_samples_per_second": 65.94, |
|
"eval_steps_per_second": 4.122, |
|
"step": 10955 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 0.0004389233954451345, |
|
"loss": 0.983, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_accuracy": 0.7912236011775762, |
|
"eval_loss": 0.9227182269096375, |
|
"eval_runtime": 1564.9848, |
|
"eval_samples_per_second": 65.935, |
|
"eval_steps_per_second": 4.121, |
|
"step": 11268 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.00042857142857142855, |
|
"loss": 0.9728, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_accuracy": 0.7926902278847275, |
|
"eval_loss": 0.9144140481948853, |
|
"eval_runtime": 1564.605, |
|
"eval_samples_per_second": 65.951, |
|
"eval_steps_per_second": 4.122, |
|
"step": 11581 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_accuracy": 0.7936642140619866, |
|
"eval_loss": 0.9096552133560181, |
|
"eval_runtime": 1565.1262, |
|
"eval_samples_per_second": 65.929, |
|
"eval_steps_per_second": 4.121, |
|
"step": 11894 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.00041821946169772254, |
|
"loss": 0.9632, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_accuracy": 0.7947595817774112, |
|
"eval_loss": 0.9028408527374268, |
|
"eval_runtime": 1564.7786, |
|
"eval_samples_per_second": 65.944, |
|
"eval_steps_per_second": 4.122, |
|
"step": 12207 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.0004078674948240166, |
|
"loss": 0.953, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_accuracy": 0.7958614481801236, |
|
"eval_loss": 0.896992564201355, |
|
"eval_runtime": 1564.1136, |
|
"eval_samples_per_second": 65.972, |
|
"eval_steps_per_second": 4.124, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_accuracy": 0.796848649990296, |
|
"eval_loss": 0.8908724188804626, |
|
"eval_runtime": 1564.2284, |
|
"eval_samples_per_second": 65.967, |
|
"eval_steps_per_second": 4.123, |
|
"step": 12833 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 0.0003975155279503105, |
|
"loss": 0.9414, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_accuracy": 0.797744672086327, |
|
"eval_loss": 0.8873794674873352, |
|
"eval_runtime": 1565.739, |
|
"eval_samples_per_second": 65.903, |
|
"eval_steps_per_second": 4.119, |
|
"step": 13146 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_accuracy": 0.7986036425092956, |
|
"eval_loss": 0.881266713142395, |
|
"eval_runtime": 1565.4321, |
|
"eval_samples_per_second": 65.916, |
|
"eval_steps_per_second": 4.12, |
|
"step": 13459 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.0003871635610766045, |
|
"loss": 0.9287, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_accuracy": 0.7999304297565917, |
|
"eval_loss": 0.8743026852607727, |
|
"eval_runtime": 1564.903, |
|
"eval_samples_per_second": 65.938, |
|
"eval_steps_per_second": 4.122, |
|
"step": 13772 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.00037681159420289854, |
|
"loss": 0.9219, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"eval_accuracy": 0.80051802299604, |
|
"eval_loss": 0.8717477917671204, |
|
"eval_runtime": 1565.3161, |
|
"eval_samples_per_second": 65.921, |
|
"eval_steps_per_second": 4.121, |
|
"step": 14085 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"eval_accuracy": 0.801263244223999, |
|
"eval_loss": 0.8662565350532532, |
|
"eval_runtime": 1564.6984, |
|
"eval_samples_per_second": 65.947, |
|
"eval_steps_per_second": 4.122, |
|
"step": 14398 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.0003664596273291925, |
|
"loss": 0.9137, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"eval_accuracy": 0.8024215648096065, |
|
"eval_loss": 0.8607039451599121, |
|
"eval_runtime": 1565.2999, |
|
"eval_samples_per_second": 65.922, |
|
"eval_steps_per_second": 4.121, |
|
"step": 14711 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.0003561076604554865, |
|
"loss": 0.9062, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"eval_accuracy": 0.80312043845701, |
|
"eval_loss": 0.8565826416015625, |
|
"eval_runtime": 1566.6554, |
|
"eval_samples_per_second": 65.865, |
|
"eval_steps_per_second": 4.117, |
|
"step": 15024 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"eval_accuracy": 0.8044941042086672, |
|
"eval_loss": 0.8497495055198669, |
|
"eval_runtime": 1566.3382, |
|
"eval_samples_per_second": 65.878, |
|
"eval_steps_per_second": 4.118, |
|
"step": 15337 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.0003457556935817805, |
|
"loss": 0.8978, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"eval_accuracy": 0.8053608800858211, |
|
"eval_loss": 0.8438310623168945, |
|
"eval_runtime": 1565.4433, |
|
"eval_samples_per_second": 65.916, |
|
"eval_steps_per_second": 4.12, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_accuracy": 0.8064097453094475, |
|
"eval_loss": 0.8402115702629089, |
|
"eval_runtime": 1565.8792, |
|
"eval_samples_per_second": 65.897, |
|
"eval_steps_per_second": 4.119, |
|
"step": 15963 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 0.0003354037267080745, |
|
"loss": 0.8907, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_accuracy": 0.8072085264677659, |
|
"eval_loss": 0.8345613479614258, |
|
"eval_runtime": 1565.4826, |
|
"eval_samples_per_second": 65.914, |
|
"eval_steps_per_second": 4.12, |
|
"step": 16276 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 0.0003250517598343685, |
|
"loss": 0.8791, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"eval_accuracy": 0.8079710854147522, |
|
"eval_loss": 0.8311530947685242, |
|
"eval_runtime": 1565.3772, |
|
"eval_samples_per_second": 65.918, |
|
"eval_steps_per_second": 4.12, |
|
"step": 16589 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"eval_accuracy": 0.808113472692542, |
|
"eval_loss": 0.8312503695487976, |
|
"eval_runtime": 1564.9419, |
|
"eval_samples_per_second": 65.937, |
|
"eval_steps_per_second": 4.122, |
|
"step": 16902 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.00031469979296066245, |
|
"loss": 0.8713, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"eval_accuracy": 0.8089870028151737, |
|
"eval_loss": 0.8246920108795166, |
|
"eval_runtime": 1566.1653, |
|
"eval_samples_per_second": 65.885, |
|
"eval_steps_per_second": 4.118, |
|
"step": 17215 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.0003043478260869565, |
|
"loss": 0.866, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"eval_accuracy": 0.8100817167380647, |
|
"eval_loss": 0.8203573822975159, |
|
"eval_runtime": 1563.7507, |
|
"eval_samples_per_second": 65.987, |
|
"eval_steps_per_second": 4.125, |
|
"step": 17528 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"eval_accuracy": 0.8106388454645975, |
|
"eval_loss": 0.8163631558418274, |
|
"eval_runtime": 1563.2024, |
|
"eval_samples_per_second": 66.01, |
|
"eval_steps_per_second": 4.126, |
|
"step": 17841 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.0002939958592132505, |
|
"loss": 0.8607, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"eval_accuracy": 0.8114071774300123, |
|
"eval_loss": 0.8129469752311707, |
|
"eval_runtime": 1563.1261, |
|
"eval_samples_per_second": 66.013, |
|
"eval_steps_per_second": 4.126, |
|
"step": 18154 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_accuracy": 0.8122062364179543, |
|
"eval_loss": 0.808627724647522, |
|
"eval_runtime": 1564.3551, |
|
"eval_samples_per_second": 65.961, |
|
"eval_steps_per_second": 4.123, |
|
"step": 18467 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 0.00028364389233954447, |
|
"loss": 0.8534, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"eval_accuracy": 0.8128903362394657, |
|
"eval_loss": 0.8055641651153564, |
|
"eval_runtime": 1563.7512, |
|
"eval_samples_per_second": 65.987, |
|
"eval_steps_per_second": 4.125, |
|
"step": 18780 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 0.0002732919254658385, |
|
"loss": 0.8448, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_accuracy": 0.8134494722363886, |
|
"eval_loss": 0.8019941449165344, |
|
"eval_runtime": 1563.2984, |
|
"eval_samples_per_second": 66.006, |
|
"eval_steps_per_second": 4.126, |
|
"step": 19093 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_accuracy": 0.8143192139194421, |
|
"eval_loss": 0.7982867956161499, |
|
"eval_runtime": 1563.6693, |
|
"eval_samples_per_second": 65.99, |
|
"eval_steps_per_second": 4.125, |
|
"step": 19406 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 0.0002629399585921325, |
|
"loss": 0.8391, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"eval_accuracy": 0.8147886522353383, |
|
"eval_loss": 0.7949500679969788, |
|
"eval_runtime": 1563.7961, |
|
"eval_samples_per_second": 65.985, |
|
"eval_steps_per_second": 4.125, |
|
"step": 19719 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 0.0002525879917184265, |
|
"loss": 0.8304, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"eval_accuracy": 0.8155781682195781, |
|
"eval_loss": 0.7908634543418884, |
|
"eval_runtime": 1563.2059, |
|
"eval_samples_per_second": 66.01, |
|
"eval_steps_per_second": 4.126, |
|
"step": 20032 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"eval_accuracy": 0.816149684044946, |
|
"eval_loss": 0.7888238430023193, |
|
"eval_runtime": 1565.7848, |
|
"eval_samples_per_second": 65.901, |
|
"eval_steps_per_second": 4.119, |
|
"step": 20345 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.00024223602484472047, |
|
"loss": 0.825, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"eval_accuracy": 0.8167946967773272, |
|
"eval_loss": 0.7858228087425232, |
|
"eval_runtime": 1565.9773, |
|
"eval_samples_per_second": 65.893, |
|
"eval_steps_per_second": 4.119, |
|
"step": 20658 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"eval_accuracy": 0.8174543786608812, |
|
"eval_loss": 0.7798792123794556, |
|
"eval_runtime": 1566.9273, |
|
"eval_samples_per_second": 65.853, |
|
"eval_steps_per_second": 4.116, |
|
"step": 20971 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.00023188405797101448, |
|
"loss": 0.8198, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"eval_accuracy": 0.818135156601779, |
|
"eval_loss": 0.7765618562698364, |
|
"eval_runtime": 1568.3727, |
|
"eval_samples_per_second": 65.792, |
|
"eval_steps_per_second": 4.113, |
|
"step": 21284 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 0.00022153209109730847, |
|
"loss": 0.8139, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"eval_accuracy": 0.8189553029778005, |
|
"eval_loss": 0.7735697627067566, |
|
"eval_runtime": 1567.9384, |
|
"eval_samples_per_second": 65.811, |
|
"eval_steps_per_second": 4.114, |
|
"step": 21597 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_accuracy": 0.8195096365262311, |
|
"eval_loss": 0.7705960273742676, |
|
"eval_runtime": 1568.1878, |
|
"eval_samples_per_second": 65.8, |
|
"eval_steps_per_second": 4.113, |
|
"step": 21910 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 0.00021118012422360248, |
|
"loss": 0.8094, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"eval_accuracy": 0.8200746154831311, |
|
"eval_loss": 0.7673328518867493, |
|
"eval_runtime": 1567.3958, |
|
"eval_samples_per_second": 65.833, |
|
"eval_steps_per_second": 4.115, |
|
"step": 22223 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 0.00020082815734989647, |
|
"loss": 0.8032, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8205203976743998, |
|
"eval_loss": 0.764415979385376, |
|
"eval_runtime": 1567.2437, |
|
"eval_samples_per_second": 65.84, |
|
"eval_steps_per_second": 4.116, |
|
"step": 22536 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"eval_accuracy": 0.8215158231256271, |
|
"eval_loss": 0.7607036232948303, |
|
"eval_runtime": 1568.907, |
|
"eval_samples_per_second": 65.77, |
|
"eval_steps_per_second": 4.111, |
|
"step": 22849 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.00019047619047619045, |
|
"loss": 0.794, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"eval_accuracy": 0.8218634447758527, |
|
"eval_loss": 0.7578034996986389, |
|
"eval_runtime": 1562.4876, |
|
"eval_samples_per_second": 66.04, |
|
"eval_steps_per_second": 4.128, |
|
"step": 23162 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"eval_accuracy": 0.8225726120979496, |
|
"eval_loss": 0.7539456486701965, |
|
"eval_runtime": 1563.2377, |
|
"eval_samples_per_second": 66.009, |
|
"eval_steps_per_second": 4.126, |
|
"step": 23475 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 0.00018012422360248444, |
|
"loss": 0.7891, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"eval_accuracy": 0.8229123629611099, |
|
"eval_loss": 0.751589298248291, |
|
"eval_runtime": 1562.2621, |
|
"eval_samples_per_second": 66.05, |
|
"eval_steps_per_second": 4.129, |
|
"step": 23788 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 0.00016977225672877845, |
|
"loss": 0.7854, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"eval_accuracy": 0.8234190549813617, |
|
"eval_loss": 0.7492088079452515, |
|
"eval_runtime": 1562.6896, |
|
"eval_samples_per_second": 66.032, |
|
"eval_steps_per_second": 4.127, |
|
"step": 24101 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"eval_accuracy": 0.823868769101777, |
|
"eval_loss": 0.7464490532875061, |
|
"eval_runtime": 1563.2351, |
|
"eval_samples_per_second": 66.009, |
|
"eval_steps_per_second": 4.126, |
|
"step": 24414 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 0.00015942028985507247, |
|
"loss": 0.7813, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"eval_accuracy": 0.8245509114698386, |
|
"eval_loss": 0.7424126863479614, |
|
"eval_runtime": 1563.6491, |
|
"eval_samples_per_second": 65.991, |
|
"eval_steps_per_second": 4.125, |
|
"step": 24727 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 0.00014906832298136645, |
|
"loss": 0.7757, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"eval_accuracy": 0.8252597131033751, |
|
"eval_loss": 0.7401213049888611, |
|
"eval_runtime": 1563.1841, |
|
"eval_samples_per_second": 66.011, |
|
"eval_steps_per_second": 4.126, |
|
"step": 25040 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"eval_accuracy": 0.8254715067571893, |
|
"eval_loss": 0.7378228306770325, |
|
"eval_runtime": 1563.1126, |
|
"eval_samples_per_second": 66.014, |
|
"eval_steps_per_second": 4.126, |
|
"step": 25353 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 0.00013871635610766044, |
|
"loss": 0.7722, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"eval_accuracy": 0.8263812114502447, |
|
"eval_loss": 0.734555184841156, |
|
"eval_runtime": 1563.3264, |
|
"eval_samples_per_second": 66.005, |
|
"eval_steps_per_second": 4.126, |
|
"step": 25666 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"eval_accuracy": 0.8266910048907001, |
|
"eval_loss": 0.7316432595252991, |
|
"eval_runtime": 1565.1422, |
|
"eval_samples_per_second": 65.928, |
|
"eval_steps_per_second": 4.121, |
|
"step": 25979 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 0.00012836438923395443, |
|
"loss": 0.765, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"eval_accuracy": 0.8270383479169559, |
|
"eval_loss": 0.7292919754981995, |
|
"eval_runtime": 1563.9337, |
|
"eval_samples_per_second": 65.979, |
|
"eval_steps_per_second": 4.124, |
|
"step": 26292 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 0.00011801242236024844, |
|
"loss": 0.7584, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"eval_accuracy": 0.8277969783059648, |
|
"eval_loss": 0.7266284227371216, |
|
"eval_runtime": 1566.4843, |
|
"eval_samples_per_second": 65.872, |
|
"eval_steps_per_second": 4.118, |
|
"step": 26605 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"eval_accuracy": 0.8280860122956746, |
|
"eval_loss": 0.7245298624038696, |
|
"eval_runtime": 1564.0929, |
|
"eval_samples_per_second": 65.972, |
|
"eval_steps_per_second": 4.124, |
|
"step": 26918 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 0.00010766045548654244, |
|
"loss": 0.7565, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"eval_accuracy": 0.8286899161017834, |
|
"eval_loss": 0.7209280729293823, |
|
"eval_runtime": 1567.5209, |
|
"eval_samples_per_second": 65.828, |
|
"eval_steps_per_second": 4.115, |
|
"step": 27231 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 9.730848861283643e-05, |
|
"loss": 0.7506, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"eval_accuracy": 0.829094583836786, |
|
"eval_loss": 0.7193953990936279, |
|
"eval_runtime": 1567.7556, |
|
"eval_samples_per_second": 65.818, |
|
"eval_steps_per_second": 4.114, |
|
"step": 27544 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"eval_accuracy": 0.8296025421755568, |
|
"eval_loss": 0.7171857357025146, |
|
"eval_runtime": 1567.618, |
|
"eval_samples_per_second": 65.824, |
|
"eval_steps_per_second": 4.115, |
|
"step": 27857 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 8.695652173913043e-05, |
|
"loss": 0.7469, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"eval_accuracy": 0.8297588118895532, |
|
"eval_loss": 0.715788722038269, |
|
"eval_runtime": 1568.2621, |
|
"eval_samples_per_second": 65.797, |
|
"eval_steps_per_second": 4.113, |
|
"step": 28170 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"eval_accuracy": 0.8303201783553539, |
|
"eval_loss": 0.7129087448120117, |
|
"eval_runtime": 1566.51, |
|
"eval_samples_per_second": 65.871, |
|
"eval_steps_per_second": 4.117, |
|
"step": 28483 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 7.660455486542441e-05, |
|
"loss": 0.7434, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"eval_accuracy": 0.8310723013027939, |
|
"eval_loss": 0.7107537984848022, |
|
"eval_runtime": 1565.7298, |
|
"eval_samples_per_second": 65.903, |
|
"eval_steps_per_second": 4.119, |
|
"step": 28796 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 6.625258799171841e-05, |
|
"loss": 0.7395, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"eval_accuracy": 0.8311976533176414, |
|
"eval_loss": 0.7103267908096313, |
|
"eval_runtime": 1566.6592, |
|
"eval_samples_per_second": 65.864, |
|
"eval_steps_per_second": 4.117, |
|
"step": 29109 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"eval_accuracy": 0.831599738008909, |
|
"eval_loss": 0.7076959609985352, |
|
"eval_runtime": 1563.475, |
|
"eval_samples_per_second": 65.998, |
|
"eval_steps_per_second": 4.125, |
|
"step": 29422 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 5.5900621118012414e-05, |
|
"loss": 0.7327, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"eval_accuracy": 0.8318789165507281, |
|
"eval_loss": 0.7060185074806213, |
|
"eval_runtime": 1563.2277, |
|
"eval_samples_per_second": 66.009, |
|
"eval_steps_per_second": 4.126, |
|
"step": 29735 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 4.554865424430642e-05, |
|
"loss": 0.7294, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"eval_accuracy": 0.8324941188917658, |
|
"eval_loss": 0.7029792070388794, |
|
"eval_runtime": 1563.5398, |
|
"eval_samples_per_second": 65.996, |
|
"eval_steps_per_second": 4.125, |
|
"step": 30048 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_accuracy": 0.832804502850609, |
|
"eval_loss": 0.7012088298797607, |
|
"eval_runtime": 1563.5997, |
|
"eval_samples_per_second": 65.993, |
|
"eval_steps_per_second": 4.125, |
|
"step": 30361 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 3.5196687370600414e-05, |
|
"loss": 0.7277, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"eval_accuracy": 0.8329869074323458, |
|
"eval_loss": 0.7000030875205994, |
|
"eval_runtime": 1563.8346, |
|
"eval_samples_per_second": 65.983, |
|
"eval_steps_per_second": 4.124, |
|
"step": 30674 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"eval_accuracy": 0.8334238360274034, |
|
"eval_loss": 0.6988800764083862, |
|
"eval_runtime": 1563.4183, |
|
"eval_samples_per_second": 66.001, |
|
"eval_steps_per_second": 4.126, |
|
"step": 30987 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 2.4844720496894407e-05, |
|
"loss": 0.7237, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"eval_accuracy": 0.8335258711989345, |
|
"eval_loss": 0.6981679201126099, |
|
"eval_runtime": 1564.2656, |
|
"eval_samples_per_second": 65.965, |
|
"eval_steps_per_second": 4.123, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 1.4492753623188405e-05, |
|
"loss": 0.7233, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"eval_accuracy": 0.8338284368943191, |
|
"eval_loss": 0.6956919431686401, |
|
"eval_runtime": 1565.7206, |
|
"eval_samples_per_second": 65.904, |
|
"eval_steps_per_second": 4.12, |
|
"step": 31613 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"eval_accuracy": 0.8342045312252818, |
|
"eval_loss": 0.6948868632316589, |
|
"eval_runtime": 1564.1555, |
|
"eval_samples_per_second": 65.97, |
|
"eval_steps_per_second": 4.124, |
|
"step": 31926 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 4.140786749482401e-06, |
|
"loss": 0.7196, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 32200, |
|
"total_flos": 2.1701190649823816e+18, |
|
"train_loss": 1.3247782011802152, |
|
"train_runtime": 514233.1245, |
|
"train_samples_per_second": 16.031, |
|
"train_steps_per_second": 0.063 |
|
} |
|
], |
|
"max_steps": 32200, |
|
"num_train_epochs": 10, |
|
"total_flos": 2.1701190649823816e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|