{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.5978367748279254, "eval_steps": 35, "global_step": 3250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 2.4860678577282414e-05, "loss": 2.6111, "step": 35 }, { "epoch": 0.02, "eval_loss": 2.0262463092803955, "eval_runtime": 11.4452, "eval_samples_per_second": 1.835, "eval_steps_per_second": 0.262, "step": 35 }, { "epoch": 0.03, "learning_rate": 2.471725946566137e-05, "loss": 2.2099, "step": 70 }, { "epoch": 0.03, "eval_loss": 1.8728946447372437, "eval_runtime": 11.4415, "eval_samples_per_second": 1.835, "eval_steps_per_second": 0.262, "step": 70 }, { "epoch": 0.05, "learning_rate": 2.4573840354040322e-05, "loss": 2.1297, "step": 105 }, { "epoch": 0.05, "eval_loss": 1.8009727001190186, "eval_runtime": 11.4312, "eval_samples_per_second": 1.837, "eval_steps_per_second": 0.262, "step": 105 }, { "epoch": 0.07, "learning_rate": 2.4430421242419278e-05, "loss": 2.0376, "step": 140 }, { "epoch": 0.07, "eval_loss": 1.7509633302688599, "eval_runtime": 11.4441, "eval_samples_per_second": 1.835, "eval_steps_per_second": 0.262, "step": 140 }, { "epoch": 0.09, "learning_rate": 2.428700213079823e-05, "loss": 2.0077, "step": 175 }, { "epoch": 0.09, "eval_loss": 1.7361119985580444, "eval_runtime": 11.4061, "eval_samples_per_second": 1.841, "eval_steps_per_second": 0.263, "step": 175 }, { "epoch": 0.1, "learning_rate": 2.4143583019177186e-05, "loss": 1.9427, "step": 210 }, { "epoch": 0.1, "eval_loss": 1.7030919790267944, "eval_runtime": 11.3698, "eval_samples_per_second": 1.847, "eval_steps_per_second": 0.264, "step": 210 }, { "epoch": 0.12, "learning_rate": 2.4000163907556138e-05, "loss": 1.9224, "step": 245 }, { "epoch": 0.12, "eval_loss": 1.6869089603424072, "eval_runtime": 11.4842, "eval_samples_per_second": 1.829, "eval_steps_per_second": 0.261, "step": 245 }, { "epoch": 0.14, "learning_rate": 2.3856744795935094e-05, "loss": 1.9088, "step": 280 }, { "epoch": 0.14, "eval_loss": 1.6638721227645874, "eval_runtime": 11.4694, "eval_samples_per_second": 1.831, "eval_steps_per_second": 0.262, "step": 280 }, { "epoch": 0.15, "learning_rate": 2.3713325684314046e-05, "loss": 1.9012, "step": 315 }, { "epoch": 0.15, "eval_loss": 1.6475718021392822, "eval_runtime": 11.4639, "eval_samples_per_second": 1.832, "eval_steps_per_second": 0.262, "step": 315 }, { "epoch": 0.17, "learning_rate": 2.3569906572693e-05, "loss": 1.8873, "step": 350 }, { "epoch": 0.17, "eval_loss": 1.6309669017791748, "eval_runtime": 11.4991, "eval_samples_per_second": 1.826, "eval_steps_per_second": 0.261, "step": 350 }, { "epoch": 0.19, "learning_rate": 2.3426487461071954e-05, "loss": 1.881, "step": 385 }, { "epoch": 0.19, "eval_loss": 1.6221837997436523, "eval_runtime": 11.509, "eval_samples_per_second": 1.825, "eval_steps_per_second": 0.261, "step": 385 }, { "epoch": 0.21, "learning_rate": 2.328306834945091e-05, "loss": 1.8931, "step": 420 }, { "epoch": 0.21, "eval_loss": 1.6102544069290161, "eval_runtime": 11.52, "eval_samples_per_second": 1.823, "eval_steps_per_second": 0.26, "step": 420 }, { "epoch": 0.22, "learning_rate": 2.3139649237829865e-05, "loss": 1.8524, "step": 455 }, { "epoch": 0.22, "eval_loss": 1.607195496559143, "eval_runtime": 11.4864, "eval_samples_per_second": 1.828, "eval_steps_per_second": 0.261, "step": 455 }, { "epoch": 0.24, "learning_rate": 2.299623012620882e-05, "loss": 1.8498, "step": 490 }, { "epoch": 0.24, "eval_loss": 1.5910367965698242, "eval_runtime": 11.4927, "eval_samples_per_second": 1.827, "eval_steps_per_second": 0.261, "step": 490 }, { "epoch": 0.26, "learning_rate": 2.2852811014587773e-05, "loss": 1.8194, "step": 525 }, { "epoch": 0.26, "eval_loss": 1.5871880054473877, "eval_runtime": 11.4668, "eval_samples_per_second": 1.831, "eval_steps_per_second": 0.262, "step": 525 }, { "epoch": 0.28, "learning_rate": 2.270939190296673e-05, "loss": 1.8619, "step": 560 }, { "epoch": 0.28, "eval_loss": 1.577917218208313, "eval_runtime": 11.4911, "eval_samples_per_second": 1.828, "eval_steps_per_second": 0.261, "step": 560 }, { "epoch": 0.29, "learning_rate": 2.256597279134568e-05, "loss": 1.8458, "step": 595 }, { "epoch": 0.29, "eval_loss": 1.569788932800293, "eval_runtime": 11.4523, "eval_samples_per_second": 1.834, "eval_steps_per_second": 0.262, "step": 595 }, { "epoch": 0.31, "learning_rate": 2.2422553679724637e-05, "loss": 1.7886, "step": 630 }, { "epoch": 0.31, "eval_loss": 1.5579793453216553, "eval_runtime": 11.4733, "eval_samples_per_second": 1.83, "eval_steps_per_second": 0.261, "step": 630 }, { "epoch": 0.33, "learning_rate": 2.227913456810359e-05, "loss": 1.8302, "step": 665 }, { "epoch": 0.33, "eval_loss": 1.5591094493865967, "eval_runtime": 11.5195, "eval_samples_per_second": 1.823, "eval_steps_per_second": 0.26, "step": 665 }, { "epoch": 0.34, "learning_rate": 2.2135715456482545e-05, "loss": 1.8173, "step": 700 }, { "epoch": 0.34, "eval_loss": 1.5488135814666748, "eval_runtime": 11.5232, "eval_samples_per_second": 1.822, "eval_steps_per_second": 0.26, "step": 700 }, { "epoch": 0.36, "learning_rate": 2.1992296344861498e-05, "loss": 1.822, "step": 735 }, { "epoch": 0.36, "eval_loss": 1.5554958581924438, "eval_runtime": 11.4539, "eval_samples_per_second": 1.833, "eval_steps_per_second": 0.262, "step": 735 }, { "epoch": 0.38, "learning_rate": 2.1848877233240453e-05, "loss": 1.7775, "step": 770 }, { "epoch": 0.38, "eval_loss": 1.5420997142791748, "eval_runtime": 11.4375, "eval_samples_per_second": 1.836, "eval_steps_per_second": 0.262, "step": 770 }, { "epoch": 0.4, "learning_rate": 2.1705458121619406e-05, "loss": 1.8007, "step": 805 }, { "epoch": 0.4, "eval_loss": 1.530941367149353, "eval_runtime": 11.5016, "eval_samples_per_second": 1.826, "eval_steps_per_second": 0.261, "step": 805 }, { "epoch": 0.41, "learning_rate": 2.156203900999836e-05, "loss": 1.802, "step": 840 }, { "epoch": 0.41, "eval_loss": 1.5383422374725342, "eval_runtime": 11.502, "eval_samples_per_second": 1.826, "eval_steps_per_second": 0.261, "step": 840 }, { "epoch": 0.43, "learning_rate": 2.1418619898377314e-05, "loss": 1.7921, "step": 875 }, { "epoch": 0.43, "eval_loss": 1.5289701223373413, "eval_runtime": 11.4801, "eval_samples_per_second": 1.829, "eval_steps_per_second": 0.261, "step": 875 }, { "epoch": 0.45, "learning_rate": 2.127520078675627e-05, "loss": 1.7741, "step": 910 }, { "epoch": 0.45, "eval_loss": 1.5224323272705078, "eval_runtime": 11.4514, "eval_samples_per_second": 1.834, "eval_steps_per_second": 0.262, "step": 910 }, { "epoch": 0.46, "learning_rate": 2.1131781675135225e-05, "loss": 1.7642, "step": 945 }, { "epoch": 0.46, "eval_loss": 1.5138221979141235, "eval_runtime": 11.4557, "eval_samples_per_second": 1.833, "eval_steps_per_second": 0.262, "step": 945 }, { "epoch": 0.48, "learning_rate": 2.098836256351418e-05, "loss": 1.7763, "step": 980 }, { "epoch": 0.48, "eval_loss": 1.5038686990737915, "eval_runtime": 11.491, "eval_samples_per_second": 1.828, "eval_steps_per_second": 0.261, "step": 980 }, { "epoch": 0.5, "learning_rate": 2.0844943451893133e-05, "loss": 1.744, "step": 1015 }, { "epoch": 0.5, "eval_loss": 1.5031676292419434, "eval_runtime": 11.541, "eval_samples_per_second": 1.82, "eval_steps_per_second": 0.26, "step": 1015 }, { "epoch": 0.52, "learning_rate": 2.070152434027209e-05, "loss": 1.7968, "step": 1050 }, { "epoch": 0.52, "eval_loss": 1.507965326309204, "eval_runtime": 11.5281, "eval_samples_per_second": 1.822, "eval_steps_per_second": 0.26, "step": 1050 }, { "epoch": 0.53, "learning_rate": 2.055810522865104e-05, "loss": 1.7192, "step": 1085 }, { "epoch": 0.53, "eval_loss": 1.4997100830078125, "eval_runtime": 11.4603, "eval_samples_per_second": 1.832, "eval_steps_per_second": 0.262, "step": 1085 }, { "epoch": 0.55, "learning_rate": 2.0414686117029997e-05, "loss": 1.7469, "step": 1120 }, { "epoch": 0.55, "eval_loss": 1.4938936233520508, "eval_runtime": 11.5073, "eval_samples_per_second": 1.825, "eval_steps_per_second": 0.261, "step": 1120 }, { "epoch": 0.57, "learning_rate": 2.027126700540895e-05, "loss": 1.7566, "step": 1155 }, { "epoch": 0.57, "eval_loss": 1.4912477731704712, "eval_runtime": 11.5141, "eval_samples_per_second": 1.824, "eval_steps_per_second": 0.261, "step": 1155 }, { "epoch": 0.59, "learning_rate": 2.0127847893787905e-05, "loss": 1.7395, "step": 1190 }, { "epoch": 0.59, "eval_loss": 1.4848188161849976, "eval_runtime": 11.5052, "eval_samples_per_second": 1.825, "eval_steps_per_second": 0.261, "step": 1190 }, { "epoch": 0.6, "learning_rate": 1.9984428782166857e-05, "loss": 1.7555, "step": 1225 }, { "epoch": 0.6, "eval_loss": 1.4838807582855225, "eval_runtime": 11.4789, "eval_samples_per_second": 1.829, "eval_steps_per_second": 0.261, "step": 1225 }, { "epoch": 0.62, "learning_rate": 1.9841009670545813e-05, "loss": 1.7447, "step": 1260 }, { "epoch": 0.62, "eval_loss": 1.4835803508758545, "eval_runtime": 11.5109, "eval_samples_per_second": 1.824, "eval_steps_per_second": 0.261, "step": 1260 }, { "epoch": 0.64, "learning_rate": 1.9697590558924765e-05, "loss": 1.7375, "step": 1295 }, { "epoch": 0.64, "eval_loss": 1.483397126197815, "eval_runtime": 11.4887, "eval_samples_per_second": 1.828, "eval_steps_per_second": 0.261, "step": 1295 }, { "epoch": 0.65, "learning_rate": 1.955417144730372e-05, "loss": 1.7565, "step": 1330 }, { "epoch": 0.65, "eval_loss": 1.480878233909607, "eval_runtime": 11.4732, "eval_samples_per_second": 1.83, "eval_steps_per_second": 0.261, "step": 1330 }, { "epoch": 0.67, "learning_rate": 1.9410752335682677e-05, "loss": 1.6992, "step": 1365 }, { "epoch": 0.67, "eval_loss": 1.471764087677002, "eval_runtime": 11.4976, "eval_samples_per_second": 1.826, "eval_steps_per_second": 0.261, "step": 1365 }, { "epoch": 0.69, "learning_rate": 1.926733322406163e-05, "loss": 1.7588, "step": 1400 }, { "epoch": 0.69, "eval_loss": 1.4718772172927856, "eval_runtime": 11.4494, "eval_samples_per_second": 1.834, "eval_steps_per_second": 0.262, "step": 1400 }, { "epoch": 0.71, "learning_rate": 1.9123914112440585e-05, "loss": 1.7459, "step": 1435 }, { "epoch": 0.71, "eval_loss": 1.4739229679107666, "eval_runtime": 11.4772, "eval_samples_per_second": 1.83, "eval_steps_per_second": 0.261, "step": 1435 }, { "epoch": 0.72, "learning_rate": 1.898049500081954e-05, "loss": 1.7496, "step": 1470 }, { "epoch": 0.72, "eval_loss": 1.4594241380691528, "eval_runtime": 11.4774, "eval_samples_per_second": 1.83, "eval_steps_per_second": 0.261, "step": 1470 }, { "epoch": 0.74, "learning_rate": 1.8837075889198493e-05, "loss": 1.7217, "step": 1505 }, { "epoch": 0.74, "eval_loss": 1.4519307613372803, "eval_runtime": 11.4991, "eval_samples_per_second": 1.826, "eval_steps_per_second": 0.261, "step": 1505 }, { "epoch": 0.76, "learning_rate": 1.8693656777577448e-05, "loss": 1.7379, "step": 1540 }, { "epoch": 0.76, "eval_loss": 1.450444221496582, "eval_runtime": 11.4875, "eval_samples_per_second": 1.828, "eval_steps_per_second": 0.261, "step": 1540 }, { "epoch": 0.77, "learning_rate": 1.85502376659564e-05, "loss": 1.7178, "step": 1575 }, { "epoch": 0.77, "eval_loss": 1.4498975276947021, "eval_runtime": 11.4834, "eval_samples_per_second": 1.829, "eval_steps_per_second": 0.261, "step": 1575 }, { "epoch": 0.79, "learning_rate": 1.8406818554335356e-05, "loss": 1.726, "step": 1610 }, { "epoch": 0.79, "eval_loss": 1.4456058740615845, "eval_runtime": 11.4896, "eval_samples_per_second": 1.828, "eval_steps_per_second": 0.261, "step": 1610 }, { "epoch": 0.81, "learning_rate": 1.826339944271431e-05, "loss": 1.6916, "step": 1645 }, { "epoch": 0.81, "eval_loss": 1.4492512941360474, "eval_runtime": 11.4853, "eval_samples_per_second": 1.828, "eval_steps_per_second": 0.261, "step": 1645 }, { "epoch": 0.83, "learning_rate": 1.8119980331093264e-05, "loss": 1.7388, "step": 1680 }, { "epoch": 0.83, "eval_loss": 1.4481810331344604, "eval_runtime": 11.5015, "eval_samples_per_second": 1.826, "eval_steps_per_second": 0.261, "step": 1680 }, { "epoch": 0.84, "learning_rate": 1.7976561219472217e-05, "loss": 1.7026, "step": 1715 }, { "epoch": 0.84, "eval_loss": 1.4463753700256348, "eval_runtime": 11.4964, "eval_samples_per_second": 1.827, "eval_steps_per_second": 0.261, "step": 1715 }, { "epoch": 0.86, "learning_rate": 1.7833142107851172e-05, "loss": 1.7025, "step": 1750 }, { "epoch": 0.86, "eval_loss": 1.4447450637817383, "eval_runtime": 11.5592, "eval_samples_per_second": 1.817, "eval_steps_per_second": 0.26, "step": 1750 }, { "epoch": 0.88, "learning_rate": 1.7689722996230128e-05, "loss": 1.7497, "step": 1785 }, { "epoch": 0.88, "eval_loss": 1.442565679550171, "eval_runtime": 11.4749, "eval_samples_per_second": 1.83, "eval_steps_per_second": 0.261, "step": 1785 }, { "epoch": 0.89, "learning_rate": 1.754630388460908e-05, "loss": 1.7545, "step": 1820 }, { "epoch": 0.89, "eval_loss": 1.4413307905197144, "eval_runtime": 11.4848, "eval_samples_per_second": 1.829, "eval_steps_per_second": 0.261, "step": 1820 }, { "epoch": 0.91, "learning_rate": 1.7402884772988036e-05, "loss": 1.6951, "step": 1855 }, { "epoch": 0.91, "eval_loss": 1.440474033355713, "eval_runtime": 11.5878, "eval_samples_per_second": 1.812, "eval_steps_per_second": 0.259, "step": 1855 }, { "epoch": 0.93, "learning_rate": 1.725946566136699e-05, "loss": 1.7432, "step": 1890 }, { "epoch": 0.93, "eval_loss": 1.4366823434829712, "eval_runtime": 11.5531, "eval_samples_per_second": 1.818, "eval_steps_per_second": 0.26, "step": 1890 }, { "epoch": 0.95, "learning_rate": 1.7116046549745944e-05, "loss": 1.7153, "step": 1925 }, { "epoch": 0.95, "eval_loss": 1.4368674755096436, "eval_runtime": 11.4927, "eval_samples_per_second": 1.827, "eval_steps_per_second": 0.261, "step": 1925 }, { "epoch": 0.96, "learning_rate": 1.69726274381249e-05, "loss": 1.732, "step": 1960 }, { "epoch": 0.96, "eval_loss": 1.4359997510910034, "eval_runtime": 11.4761, "eval_samples_per_second": 1.83, "eval_steps_per_second": 0.261, "step": 1960 }, { "epoch": 0.98, "learning_rate": 1.6829208326503852e-05, "loss": 1.7143, "step": 1995 }, { "epoch": 0.98, "eval_loss": 1.437907338142395, "eval_runtime": 11.5143, "eval_samples_per_second": 1.824, "eval_steps_per_second": 0.261, "step": 1995 }, { "epoch": 1.0, "learning_rate": 1.6685789214882808e-05, "loss": 1.7072, "step": 2030 }, { "epoch": 1.0, "eval_loss": 1.4374998807907104, "eval_runtime": 11.4633, "eval_samples_per_second": 1.832, "eval_steps_per_second": 0.262, "step": 2030 }, { "epoch": 1.02, "learning_rate": 1.654237010326176e-05, "loss": 1.591, "step": 2065 }, { "epoch": 1.02, "eval_loss": 1.4363301992416382, "eval_runtime": 11.4952, "eval_samples_per_second": 1.827, "eval_steps_per_second": 0.261, "step": 2065 }, { "epoch": 1.03, "learning_rate": 1.6398950991640716e-05, "loss": 1.549, "step": 2100 }, { "epoch": 1.03, "eval_loss": 1.4447616338729858, "eval_runtime": 11.4847, "eval_samples_per_second": 1.829, "eval_steps_per_second": 0.261, "step": 2100 }, { "epoch": 1.05, "learning_rate": 1.6255531880019668e-05, "loss": 1.5636, "step": 2135 }, { "epoch": 1.05, "eval_loss": 1.4376814365386963, "eval_runtime": 11.5477, "eval_samples_per_second": 1.819, "eval_steps_per_second": 0.26, "step": 2135 }, { "epoch": 1.07, "learning_rate": 1.6112112768398624e-05, "loss": 1.5629, "step": 2170 }, { "epoch": 1.07, "eval_loss": 1.4429987668991089, "eval_runtime": 11.5258, "eval_samples_per_second": 1.822, "eval_steps_per_second": 0.26, "step": 2170 }, { "epoch": 1.08, "learning_rate": 1.596869365677758e-05, "loss": 1.5539, "step": 2205 }, { "epoch": 1.08, "eval_loss": 1.4357415437698364, "eval_runtime": 11.5216, "eval_samples_per_second": 1.823, "eval_steps_per_second": 0.26, "step": 2205 }, { "epoch": 1.1, "learning_rate": 1.5825274545156532e-05, "loss": 1.574, "step": 2240 }, { "epoch": 1.1, "eval_loss": 1.4316208362579346, "eval_runtime": 11.5068, "eval_samples_per_second": 1.825, "eval_steps_per_second": 0.261, "step": 2240 }, { "epoch": 1.12, "learning_rate": 1.5681855433535488e-05, "loss": 1.5976, "step": 2275 }, { "epoch": 1.12, "eval_loss": 1.4350919723510742, "eval_runtime": 11.554, "eval_samples_per_second": 1.818, "eval_steps_per_second": 0.26, "step": 2275 }, { "epoch": 1.14, "learning_rate": 1.553843632191444e-05, "loss": 1.6087, "step": 2310 }, { "epoch": 1.14, "eval_loss": 1.4374239444732666, "eval_runtime": 11.5273, "eval_samples_per_second": 1.822, "eval_steps_per_second": 0.26, "step": 2310 }, { "epoch": 1.15, "learning_rate": 1.5395017210293396e-05, "loss": 1.5684, "step": 2345 }, { "epoch": 1.15, "eval_loss": 1.4325898885726929, "eval_runtime": 11.558, "eval_samples_per_second": 1.817, "eval_steps_per_second": 0.26, "step": 2345 }, { "epoch": 1.17, "learning_rate": 1.5251598098672348e-05, "loss": 1.5858, "step": 2380 }, { "epoch": 1.17, "eval_loss": 1.437401294708252, "eval_runtime": 11.542, "eval_samples_per_second": 1.819, "eval_steps_per_second": 0.26, "step": 2380 }, { "epoch": 1.19, "learning_rate": 1.5108178987051302e-05, "loss": 1.5768, "step": 2415 }, { "epoch": 1.19, "eval_loss": 1.437371015548706, "eval_runtime": 11.5127, "eval_samples_per_second": 1.824, "eval_steps_per_second": 0.261, "step": 2415 }, { "epoch": 1.2, "learning_rate": 1.496475987543026e-05, "loss": 1.5719, "step": 2450 }, { "epoch": 1.2, "eval_loss": 1.4342734813690186, "eval_runtime": 11.5878, "eval_samples_per_second": 1.812, "eval_steps_per_second": 0.259, "step": 2450 }, { "epoch": 1.22, "learning_rate": 1.4821340763809213e-05, "loss": 1.5661, "step": 2485 }, { "epoch": 1.22, "eval_loss": 1.4325451850891113, "eval_runtime": 11.5679, "eval_samples_per_second": 1.815, "eval_steps_per_second": 0.259, "step": 2485 }, { "epoch": 1.24, "learning_rate": 1.4677921652188167e-05, "loss": 1.571, "step": 2520 }, { "epoch": 1.24, "eval_loss": 1.4307782649993896, "eval_runtime": 11.5174, "eval_samples_per_second": 1.823, "eval_steps_per_second": 0.26, "step": 2520 }, { "epoch": 1.26, "learning_rate": 1.4534502540567121e-05, "loss": 1.54, "step": 2555 }, { "epoch": 1.26, "eval_loss": 1.4300092458724976, "eval_runtime": 11.5587, "eval_samples_per_second": 1.817, "eval_steps_per_second": 0.26, "step": 2555 }, { "epoch": 1.27, "learning_rate": 1.4391083428946075e-05, "loss": 1.5275, "step": 2590 }, { "epoch": 1.27, "eval_loss": 1.4285993576049805, "eval_runtime": 11.5338, "eval_samples_per_second": 1.821, "eval_steps_per_second": 0.26, "step": 2590 }, { "epoch": 1.29, "learning_rate": 1.424766431732503e-05, "loss": 1.5837, "step": 2625 }, { "epoch": 1.29, "eval_loss": 1.4346880912780762, "eval_runtime": 11.5585, "eval_samples_per_second": 1.817, "eval_steps_per_second": 0.26, "step": 2625 }, { "epoch": 1.31, "learning_rate": 1.4104245205703983e-05, "loss": 1.571, "step": 2660 }, { "epoch": 1.31, "eval_loss": 1.4264836311340332, "eval_runtime": 11.5547, "eval_samples_per_second": 1.817, "eval_steps_per_second": 0.26, "step": 2660 }, { "epoch": 1.32, "learning_rate": 1.3960826094082937e-05, "loss": 1.5538, "step": 2695 }, { "epoch": 1.32, "eval_loss": 1.424988865852356, "eval_runtime": 11.5078, "eval_samples_per_second": 1.825, "eval_steps_per_second": 0.261, "step": 2695 }, { "epoch": 1.34, "learning_rate": 1.3817406982461891e-05, "loss": 1.5634, "step": 2730 }, { "epoch": 1.34, "eval_loss": 1.4214783906936646, "eval_runtime": 11.5523, "eval_samples_per_second": 1.818, "eval_steps_per_second": 0.26, "step": 2730 }, { "epoch": 1.36, "learning_rate": 1.3673987870840845e-05, "loss": 1.5762, "step": 2765 }, { "epoch": 1.36, "eval_loss": 1.417558193206787, "eval_runtime": 11.5881, "eval_samples_per_second": 1.812, "eval_steps_per_second": 0.259, "step": 2765 }, { "epoch": 1.38, "learning_rate": 1.35305687592198e-05, "loss": 1.5574, "step": 2800 }, { "epoch": 1.38, "eval_loss": 1.4196423292160034, "eval_runtime": 11.5684, "eval_samples_per_second": 1.815, "eval_steps_per_second": 0.259, "step": 2800 }, { "epoch": 1.39, "learning_rate": 1.3387149647598754e-05, "loss": 1.5783, "step": 2835 }, { "epoch": 1.39, "eval_loss": 1.42311429977417, "eval_runtime": 11.5665, "eval_samples_per_second": 1.816, "eval_steps_per_second": 0.259, "step": 2835 }, { "epoch": 1.41, "learning_rate": 1.3243730535977708e-05, "loss": 1.576, "step": 2870 }, { "epoch": 1.41, "eval_loss": 1.4219133853912354, "eval_runtime": 11.5466, "eval_samples_per_second": 1.819, "eval_steps_per_second": 0.26, "step": 2870 }, { "epoch": 1.43, "learning_rate": 1.3100311424356662e-05, "loss": 1.6013, "step": 2905 }, { "epoch": 1.43, "eval_loss": 1.4164221286773682, "eval_runtime": 11.5232, "eval_samples_per_second": 1.822, "eval_steps_per_second": 0.26, "step": 2905 }, { "epoch": 1.45, "learning_rate": 1.2956892312735619e-05, "loss": 1.5824, "step": 2940 }, { "epoch": 1.45, "eval_loss": 1.4148584604263306, "eval_runtime": 11.494, "eval_samples_per_second": 1.827, "eval_steps_per_second": 0.261, "step": 2940 }, { "epoch": 1.46, "learning_rate": 1.2813473201114573e-05, "loss": 1.5954, "step": 2975 }, { "epoch": 1.46, "eval_loss": 1.4142621755599976, "eval_runtime": 11.564, "eval_samples_per_second": 1.816, "eval_steps_per_second": 0.259, "step": 2975 }, { "epoch": 1.48, "learning_rate": 1.2670054089493527e-05, "loss": 1.5621, "step": 3010 }, { "epoch": 1.48, "eval_loss": 1.4165393114089966, "eval_runtime": 11.5455, "eval_samples_per_second": 1.819, "eval_steps_per_second": 0.26, "step": 3010 }, { "epoch": 1.5, "learning_rate": 1.2526634977872481e-05, "loss": 1.5877, "step": 3045 }, { "epoch": 1.5, "eval_loss": 1.41389000415802, "eval_runtime": 11.5632, "eval_samples_per_second": 1.816, "eval_steps_per_second": 0.259, "step": 3045 }, { "epoch": 1.51, "learning_rate": 1.2383215866251435e-05, "loss": 1.5843, "step": 3080 }, { "epoch": 1.51, "eval_loss": 1.409727931022644, "eval_runtime": 11.5302, "eval_samples_per_second": 1.821, "eval_steps_per_second": 0.26, "step": 3080 }, { "epoch": 1.53, "learning_rate": 1.2239796754630389e-05, "loss": 1.5255, "step": 3115 }, { "epoch": 1.53, "eval_loss": 1.419966697692871, "eval_runtime": 11.5457, "eval_samples_per_second": 1.819, "eval_steps_per_second": 0.26, "step": 3115 }, { "epoch": 1.55, "learning_rate": 1.2096377643009343e-05, "loss": 1.55, "step": 3150 }, { "epoch": 1.55, "eval_loss": 1.4153906106948853, "eval_runtime": 11.5656, "eval_samples_per_second": 1.816, "eval_steps_per_second": 0.259, "step": 3150 }, { "epoch": 1.57, "learning_rate": 1.1952958531388297e-05, "loss": 1.5519, "step": 3185 }, { "epoch": 1.57, "eval_loss": 1.4161474704742432, "eval_runtime": 11.5425, "eval_samples_per_second": 1.819, "eval_steps_per_second": 0.26, "step": 3185 }, { "epoch": 1.58, "learning_rate": 1.1809539419767251e-05, "loss": 1.5273, "step": 3220 }, { "epoch": 1.58, "eval_loss": 1.416408658027649, "eval_runtime": 11.5376, "eval_samples_per_second": 1.82, "eval_steps_per_second": 0.26, "step": 3220 } ], "logging_steps": 35, "max_steps": 6102, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 250, "total_flos": 1.093574423789568e+18, "train_batch_size": 6, "trial_name": null, "trial_params": null }