|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.5978367748279254, |
|
"eval_steps": 35, |
|
"global_step": 3250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.4860678577282414e-05, |
|
"loss": 2.6111, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.0262463092803955, |
|
"eval_runtime": 11.4452, |
|
"eval_samples_per_second": 1.835, |
|
"eval_steps_per_second": 0.262, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.471725946566137e-05, |
|
"loss": 2.2099, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 1.8728946447372437, |
|
"eval_runtime": 11.4415, |
|
"eval_samples_per_second": 1.835, |
|
"eval_steps_per_second": 0.262, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4573840354040322e-05, |
|
"loss": 2.1297, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.8009727001190186, |
|
"eval_runtime": 11.4312, |
|
"eval_samples_per_second": 1.837, |
|
"eval_steps_per_second": 0.262, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.4430421242419278e-05, |
|
"loss": 2.0376, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.7509633302688599, |
|
"eval_runtime": 11.4441, |
|
"eval_samples_per_second": 1.835, |
|
"eval_steps_per_second": 0.262, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.428700213079823e-05, |
|
"loss": 2.0077, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.7361119985580444, |
|
"eval_runtime": 11.4061, |
|
"eval_samples_per_second": 1.841, |
|
"eval_steps_per_second": 0.263, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.4143583019177186e-05, |
|
"loss": 1.9427, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.7030919790267944, |
|
"eval_runtime": 11.3698, |
|
"eval_samples_per_second": 1.847, |
|
"eval_steps_per_second": 0.264, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.4000163907556138e-05, |
|
"loss": 1.9224, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.6869089603424072, |
|
"eval_runtime": 11.4842, |
|
"eval_samples_per_second": 1.829, |
|
"eval_steps_per_second": 0.261, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.3856744795935094e-05, |
|
"loss": 1.9088, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.6638721227645874, |
|
"eval_runtime": 11.4694, |
|
"eval_samples_per_second": 1.831, |
|
"eval_steps_per_second": 0.262, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.3713325684314046e-05, |
|
"loss": 1.9012, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.6475718021392822, |
|
"eval_runtime": 11.4639, |
|
"eval_samples_per_second": 1.832, |
|
"eval_steps_per_second": 0.262, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.3569906572693e-05, |
|
"loss": 1.8873, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 1.6309669017791748, |
|
"eval_runtime": 11.4991, |
|
"eval_samples_per_second": 1.826, |
|
"eval_steps_per_second": 0.261, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.3426487461071954e-05, |
|
"loss": 1.881, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.6221837997436523, |
|
"eval_runtime": 11.509, |
|
"eval_samples_per_second": 1.825, |
|
"eval_steps_per_second": 0.261, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.328306834945091e-05, |
|
"loss": 1.8931, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.6102544069290161, |
|
"eval_runtime": 11.52, |
|
"eval_samples_per_second": 1.823, |
|
"eval_steps_per_second": 0.26, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.3139649237829865e-05, |
|
"loss": 1.8524, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.607195496559143, |
|
"eval_runtime": 11.4864, |
|
"eval_samples_per_second": 1.828, |
|
"eval_steps_per_second": 0.261, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.299623012620882e-05, |
|
"loss": 1.8498, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.5910367965698242, |
|
"eval_runtime": 11.4927, |
|
"eval_samples_per_second": 1.827, |
|
"eval_steps_per_second": 0.261, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.2852811014587773e-05, |
|
"loss": 1.8194, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.5871880054473877, |
|
"eval_runtime": 11.4668, |
|
"eval_samples_per_second": 1.831, |
|
"eval_steps_per_second": 0.262, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.270939190296673e-05, |
|
"loss": 1.8619, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.577917218208313, |
|
"eval_runtime": 11.4911, |
|
"eval_samples_per_second": 1.828, |
|
"eval_steps_per_second": 0.261, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.256597279134568e-05, |
|
"loss": 1.8458, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.569788932800293, |
|
"eval_runtime": 11.4523, |
|
"eval_samples_per_second": 1.834, |
|
"eval_steps_per_second": 0.262, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.2422553679724637e-05, |
|
"loss": 1.7886, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.5579793453216553, |
|
"eval_runtime": 11.4733, |
|
"eval_samples_per_second": 1.83, |
|
"eval_steps_per_second": 0.261, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.227913456810359e-05, |
|
"loss": 1.8302, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.5591094493865967, |
|
"eval_runtime": 11.5195, |
|
"eval_samples_per_second": 1.823, |
|
"eval_steps_per_second": 0.26, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.2135715456482545e-05, |
|
"loss": 1.8173, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.5488135814666748, |
|
"eval_runtime": 11.5232, |
|
"eval_samples_per_second": 1.822, |
|
"eval_steps_per_second": 0.26, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.1992296344861498e-05, |
|
"loss": 1.822, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.5554958581924438, |
|
"eval_runtime": 11.4539, |
|
"eval_samples_per_second": 1.833, |
|
"eval_steps_per_second": 0.262, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.1848877233240453e-05, |
|
"loss": 1.7775, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.5420997142791748, |
|
"eval_runtime": 11.4375, |
|
"eval_samples_per_second": 1.836, |
|
"eval_steps_per_second": 0.262, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.1705458121619406e-05, |
|
"loss": 1.8007, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.530941367149353, |
|
"eval_runtime": 11.5016, |
|
"eval_samples_per_second": 1.826, |
|
"eval_steps_per_second": 0.261, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.156203900999836e-05, |
|
"loss": 1.802, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.5383422374725342, |
|
"eval_runtime": 11.502, |
|
"eval_samples_per_second": 1.826, |
|
"eval_steps_per_second": 0.261, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.1418619898377314e-05, |
|
"loss": 1.7921, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.5289701223373413, |
|
"eval_runtime": 11.4801, |
|
"eval_samples_per_second": 1.829, |
|
"eval_steps_per_second": 0.261, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.127520078675627e-05, |
|
"loss": 1.7741, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.5224323272705078, |
|
"eval_runtime": 11.4514, |
|
"eval_samples_per_second": 1.834, |
|
"eval_steps_per_second": 0.262, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.1131781675135225e-05, |
|
"loss": 1.7642, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.5138221979141235, |
|
"eval_runtime": 11.4557, |
|
"eval_samples_per_second": 1.833, |
|
"eval_steps_per_second": 0.262, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.098836256351418e-05, |
|
"loss": 1.7763, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.5038686990737915, |
|
"eval_runtime": 11.491, |
|
"eval_samples_per_second": 1.828, |
|
"eval_steps_per_second": 0.261, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.0844943451893133e-05, |
|
"loss": 1.744, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.5031676292419434, |
|
"eval_runtime": 11.541, |
|
"eval_samples_per_second": 1.82, |
|
"eval_steps_per_second": 0.26, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.070152434027209e-05, |
|
"loss": 1.7968, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.507965326309204, |
|
"eval_runtime": 11.5281, |
|
"eval_samples_per_second": 1.822, |
|
"eval_steps_per_second": 0.26, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.055810522865104e-05, |
|
"loss": 1.7192, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.4997100830078125, |
|
"eval_runtime": 11.4603, |
|
"eval_samples_per_second": 1.832, |
|
"eval_steps_per_second": 0.262, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.0414686117029997e-05, |
|
"loss": 1.7469, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.4938936233520508, |
|
"eval_runtime": 11.5073, |
|
"eval_samples_per_second": 1.825, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.027126700540895e-05, |
|
"loss": 1.7566, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.4912477731704712, |
|
"eval_runtime": 11.5141, |
|
"eval_samples_per_second": 1.824, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0127847893787905e-05, |
|
"loss": 1.7395, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.4848188161849976, |
|
"eval_runtime": 11.5052, |
|
"eval_samples_per_second": 1.825, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.9984428782166857e-05, |
|
"loss": 1.7555, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.4838807582855225, |
|
"eval_runtime": 11.4789, |
|
"eval_samples_per_second": 1.829, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9841009670545813e-05, |
|
"loss": 1.7447, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.4835803508758545, |
|
"eval_runtime": 11.5109, |
|
"eval_samples_per_second": 1.824, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.9697590558924765e-05, |
|
"loss": 1.7375, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.483397126197815, |
|
"eval_runtime": 11.4887, |
|
"eval_samples_per_second": 1.828, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.955417144730372e-05, |
|
"loss": 1.7565, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.480878233909607, |
|
"eval_runtime": 11.4732, |
|
"eval_samples_per_second": 1.83, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.9410752335682677e-05, |
|
"loss": 1.6992, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.471764087677002, |
|
"eval_runtime": 11.4976, |
|
"eval_samples_per_second": 1.826, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.926733322406163e-05, |
|
"loss": 1.7588, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.4718772172927856, |
|
"eval_runtime": 11.4494, |
|
"eval_samples_per_second": 1.834, |
|
"eval_steps_per_second": 0.262, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.9123914112440585e-05, |
|
"loss": 1.7459, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.4739229679107666, |
|
"eval_runtime": 11.4772, |
|
"eval_samples_per_second": 1.83, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.898049500081954e-05, |
|
"loss": 1.7496, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.4594241380691528, |
|
"eval_runtime": 11.4774, |
|
"eval_samples_per_second": 1.83, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.8837075889198493e-05, |
|
"loss": 1.7217, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.4519307613372803, |
|
"eval_runtime": 11.4991, |
|
"eval_samples_per_second": 1.826, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.8693656777577448e-05, |
|
"loss": 1.7379, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.450444221496582, |
|
"eval_runtime": 11.4875, |
|
"eval_samples_per_second": 1.828, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.85502376659564e-05, |
|
"loss": 1.7178, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.4498975276947021, |
|
"eval_runtime": 11.4834, |
|
"eval_samples_per_second": 1.829, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.8406818554335356e-05, |
|
"loss": 1.726, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 1.4456058740615845, |
|
"eval_runtime": 11.4896, |
|
"eval_samples_per_second": 1.828, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.826339944271431e-05, |
|
"loss": 1.6916, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 1.4492512941360474, |
|
"eval_runtime": 11.4853, |
|
"eval_samples_per_second": 1.828, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.8119980331093264e-05, |
|
"loss": 1.7388, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 1.4481810331344604, |
|
"eval_runtime": 11.5015, |
|
"eval_samples_per_second": 1.826, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.7976561219472217e-05, |
|
"loss": 1.7026, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 1.4463753700256348, |
|
"eval_runtime": 11.4964, |
|
"eval_samples_per_second": 1.827, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.7833142107851172e-05, |
|
"loss": 1.7025, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 1.4447450637817383, |
|
"eval_runtime": 11.5592, |
|
"eval_samples_per_second": 1.817, |
|
"eval_steps_per_second": 0.26, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.7689722996230128e-05, |
|
"loss": 1.7497, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 1.442565679550171, |
|
"eval_runtime": 11.4749, |
|
"eval_samples_per_second": 1.83, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.754630388460908e-05, |
|
"loss": 1.7545, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 1.4413307905197144, |
|
"eval_runtime": 11.4848, |
|
"eval_samples_per_second": 1.829, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.7402884772988036e-05, |
|
"loss": 1.6951, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 1.440474033355713, |
|
"eval_runtime": 11.5878, |
|
"eval_samples_per_second": 1.812, |
|
"eval_steps_per_second": 0.259, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.725946566136699e-05, |
|
"loss": 1.7432, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 1.4366823434829712, |
|
"eval_runtime": 11.5531, |
|
"eval_samples_per_second": 1.818, |
|
"eval_steps_per_second": 0.26, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.7116046549745944e-05, |
|
"loss": 1.7153, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.4368674755096436, |
|
"eval_runtime": 11.4927, |
|
"eval_samples_per_second": 1.827, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.69726274381249e-05, |
|
"loss": 1.732, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 1.4359997510910034, |
|
"eval_runtime": 11.4761, |
|
"eval_samples_per_second": 1.83, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.6829208326503852e-05, |
|
"loss": 1.7143, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 1.437907338142395, |
|
"eval_runtime": 11.5143, |
|
"eval_samples_per_second": 1.824, |
|
"eval_steps_per_second": 0.261, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.6685789214882808e-05, |
|
"loss": 1.7072, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.4374998807907104, |
|
"eval_runtime": 11.4633, |
|
"eval_samples_per_second": 1.832, |
|
"eval_steps_per_second": 0.262, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.654237010326176e-05, |
|
"loss": 1.591, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 1.4363301992416382, |
|
"eval_runtime": 11.4952, |
|
"eval_samples_per_second": 1.827, |
|
"eval_steps_per_second": 0.261, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.6398950991640716e-05, |
|
"loss": 1.549, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 1.4447616338729858, |
|
"eval_runtime": 11.4847, |
|
"eval_samples_per_second": 1.829, |
|
"eval_steps_per_second": 0.261, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.6255531880019668e-05, |
|
"loss": 1.5636, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 1.4376814365386963, |
|
"eval_runtime": 11.5477, |
|
"eval_samples_per_second": 1.819, |
|
"eval_steps_per_second": 0.26, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.6112112768398624e-05, |
|
"loss": 1.5629, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 1.4429987668991089, |
|
"eval_runtime": 11.5258, |
|
"eval_samples_per_second": 1.822, |
|
"eval_steps_per_second": 0.26, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.596869365677758e-05, |
|
"loss": 1.5539, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 1.4357415437698364, |
|
"eval_runtime": 11.5216, |
|
"eval_samples_per_second": 1.823, |
|
"eval_steps_per_second": 0.26, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.5825274545156532e-05, |
|
"loss": 1.574, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 1.4316208362579346, |
|
"eval_runtime": 11.5068, |
|
"eval_samples_per_second": 1.825, |
|
"eval_steps_per_second": 0.261, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.5681855433535488e-05, |
|
"loss": 1.5976, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 1.4350919723510742, |
|
"eval_runtime": 11.554, |
|
"eval_samples_per_second": 1.818, |
|
"eval_steps_per_second": 0.26, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.553843632191444e-05, |
|
"loss": 1.6087, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 1.4374239444732666, |
|
"eval_runtime": 11.5273, |
|
"eval_samples_per_second": 1.822, |
|
"eval_steps_per_second": 0.26, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.5395017210293396e-05, |
|
"loss": 1.5684, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 1.4325898885726929, |
|
"eval_runtime": 11.558, |
|
"eval_samples_per_second": 1.817, |
|
"eval_steps_per_second": 0.26, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.5251598098672348e-05, |
|
"loss": 1.5858, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 1.437401294708252, |
|
"eval_runtime": 11.542, |
|
"eval_samples_per_second": 1.819, |
|
"eval_steps_per_second": 0.26, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.5108178987051302e-05, |
|
"loss": 1.5768, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 1.437371015548706, |
|
"eval_runtime": 11.5127, |
|
"eval_samples_per_second": 1.824, |
|
"eval_steps_per_second": 0.261, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.496475987543026e-05, |
|
"loss": 1.5719, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 1.4342734813690186, |
|
"eval_runtime": 11.5878, |
|
"eval_samples_per_second": 1.812, |
|
"eval_steps_per_second": 0.259, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.4821340763809213e-05, |
|
"loss": 1.5661, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_loss": 1.4325451850891113, |
|
"eval_runtime": 11.5679, |
|
"eval_samples_per_second": 1.815, |
|
"eval_steps_per_second": 0.259, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.4677921652188167e-05, |
|
"loss": 1.571, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 1.4307782649993896, |
|
"eval_runtime": 11.5174, |
|
"eval_samples_per_second": 1.823, |
|
"eval_steps_per_second": 0.26, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.4534502540567121e-05, |
|
"loss": 1.54, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 1.4300092458724976, |
|
"eval_runtime": 11.5587, |
|
"eval_samples_per_second": 1.817, |
|
"eval_steps_per_second": 0.26, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.4391083428946075e-05, |
|
"loss": 1.5275, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 1.4285993576049805, |
|
"eval_runtime": 11.5338, |
|
"eval_samples_per_second": 1.821, |
|
"eval_steps_per_second": 0.26, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.424766431732503e-05, |
|
"loss": 1.5837, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 1.4346880912780762, |
|
"eval_runtime": 11.5585, |
|
"eval_samples_per_second": 1.817, |
|
"eval_steps_per_second": 0.26, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.4104245205703983e-05, |
|
"loss": 1.571, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 1.4264836311340332, |
|
"eval_runtime": 11.5547, |
|
"eval_samples_per_second": 1.817, |
|
"eval_steps_per_second": 0.26, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.3960826094082937e-05, |
|
"loss": 1.5538, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_loss": 1.424988865852356, |
|
"eval_runtime": 11.5078, |
|
"eval_samples_per_second": 1.825, |
|
"eval_steps_per_second": 0.261, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.3817406982461891e-05, |
|
"loss": 1.5634, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_loss": 1.4214783906936646, |
|
"eval_runtime": 11.5523, |
|
"eval_samples_per_second": 1.818, |
|
"eval_steps_per_second": 0.26, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.3673987870840845e-05, |
|
"loss": 1.5762, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 1.417558193206787, |
|
"eval_runtime": 11.5881, |
|
"eval_samples_per_second": 1.812, |
|
"eval_steps_per_second": 0.259, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.35305687592198e-05, |
|
"loss": 1.5574, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 1.4196423292160034, |
|
"eval_runtime": 11.5684, |
|
"eval_samples_per_second": 1.815, |
|
"eval_steps_per_second": 0.259, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.3387149647598754e-05, |
|
"loss": 1.5783, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_loss": 1.42311429977417, |
|
"eval_runtime": 11.5665, |
|
"eval_samples_per_second": 1.816, |
|
"eval_steps_per_second": 0.259, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.3243730535977708e-05, |
|
"loss": 1.576, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 1.4219133853912354, |
|
"eval_runtime": 11.5466, |
|
"eval_samples_per_second": 1.819, |
|
"eval_steps_per_second": 0.26, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.3100311424356662e-05, |
|
"loss": 1.6013, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 1.4164221286773682, |
|
"eval_runtime": 11.5232, |
|
"eval_samples_per_second": 1.822, |
|
"eval_steps_per_second": 0.26, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.2956892312735619e-05, |
|
"loss": 1.5824, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_loss": 1.4148584604263306, |
|
"eval_runtime": 11.494, |
|
"eval_samples_per_second": 1.827, |
|
"eval_steps_per_second": 0.261, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.2813473201114573e-05, |
|
"loss": 1.5954, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 1.4142621755599976, |
|
"eval_runtime": 11.564, |
|
"eval_samples_per_second": 1.816, |
|
"eval_steps_per_second": 0.259, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.2670054089493527e-05, |
|
"loss": 1.5621, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 1.4165393114089966, |
|
"eval_runtime": 11.5455, |
|
"eval_samples_per_second": 1.819, |
|
"eval_steps_per_second": 0.26, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.2526634977872481e-05, |
|
"loss": 1.5877, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 1.41389000415802, |
|
"eval_runtime": 11.5632, |
|
"eval_samples_per_second": 1.816, |
|
"eval_steps_per_second": 0.259, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.2383215866251435e-05, |
|
"loss": 1.5843, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 1.409727931022644, |
|
"eval_runtime": 11.5302, |
|
"eval_samples_per_second": 1.821, |
|
"eval_steps_per_second": 0.26, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.2239796754630389e-05, |
|
"loss": 1.5255, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 1.419966697692871, |
|
"eval_runtime": 11.5457, |
|
"eval_samples_per_second": 1.819, |
|
"eval_steps_per_second": 0.26, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.2096377643009343e-05, |
|
"loss": 1.55, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 1.4153906106948853, |
|
"eval_runtime": 11.5656, |
|
"eval_samples_per_second": 1.816, |
|
"eval_steps_per_second": 0.259, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.1952958531388297e-05, |
|
"loss": 1.5519, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 1.4161474704742432, |
|
"eval_runtime": 11.5425, |
|
"eval_samples_per_second": 1.819, |
|
"eval_steps_per_second": 0.26, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.1809539419767251e-05, |
|
"loss": 1.5273, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 1.416408658027649, |
|
"eval_runtime": 11.5376, |
|
"eval_samples_per_second": 1.82, |
|
"eval_steps_per_second": 0.26, |
|
"step": 3220 |
|
} |
|
], |
|
"logging_steps": 35, |
|
"max_steps": 6102, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 250, |
|
"total_flos": 1.093574423789568e+18, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|