{ "best_metric": 1.061107873916626, "best_model_checkpoint": "saves/LLaMA2-13B/lora/train_2024-01-03-02-43-22/checkpoint-400", "epoch": 2.9966850828729283, "eval_steps": 100, "global_step": 678, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.00019997316318671806, "loss": 1.4828, "step": 5 }, { "epoch": 0.04, "learning_rate": 0.00019989266715116316, "loss": 1.1204, "step": 10 }, { "epoch": 0.07, "learning_rate": 0.00019975855509847686, "loss": 1.1449, "step": 15 }, { "epoch": 0.09, "learning_rate": 0.00019957089901146148, "loss": 1.1126, "step": 20 }, { "epoch": 0.11, "learning_rate": 0.00019932979961194435, "loss": 1.1048, "step": 25 }, { "epoch": 0.13, "learning_rate": 0.0001990353863067169, "loss": 1.2161, "step": 30 }, { "epoch": 0.15, "learning_rate": 0.00019868781711807705, "loss": 1.0051, "step": 35 }, { "epoch": 0.18, "learning_rate": 0.00019828727859901317, "loss": 1.0314, "step": 40 }, { "epoch": 0.2, "learning_rate": 0.00019783398573307428, "loss": 1.0718, "step": 45 }, { "epoch": 0.22, "learning_rate": 0.00019732818181898045, "loss": 1.1571, "step": 50 }, { "epoch": 0.24, "learning_rate": 0.0001967701383400357, "loss": 1.0602, "step": 55 }, { "epoch": 0.27, "learning_rate": 0.0001961601548184129, "loss": 1.1215, "step": 60 }, { "epoch": 0.29, "learning_rate": 0.00019549855865438965, "loss": 1.1434, "step": 65 }, { "epoch": 0.31, "learning_rate": 0.00019478570495062037, "loss": 1.0844, "step": 70 }, { "epoch": 0.33, "learning_rate": 0.00019402197632153992, "loss": 1.1123, "step": 75 }, { "epoch": 0.35, "learning_rate": 0.00019320778268800066, "loss": 1.1054, "step": 80 }, { "epoch": 0.38, "learning_rate": 0.00019234356105725297, "loss": 1.0368, "step": 85 }, { "epoch": 0.4, "learning_rate": 0.00019142977528838762, "loss": 1.1195, "step": 90 }, { "epoch": 0.42, "learning_rate": 0.00019046691584336577, "loss": 1.0778, "step": 95 }, { "epoch": 0.44, "learning_rate": 0.0001894554995237703, "loss": 1.0228, "step": 100 }, { "epoch": 0.44, "eval_loss": 1.0887844562530518, "eval_runtime": 75.527, "eval_samples_per_second": 11.982, "eval_steps_per_second": 1.509, "step": 100 }, { "epoch": 0.46, "learning_rate": 0.0001883960691934196, "loss": 1.1156, "step": 105 }, { "epoch": 0.49, "learning_rate": 0.00018728919348699283, "loss": 1.0259, "step": 110 }, { "epoch": 0.51, "learning_rate": 0.00018613546650482322, "loss": 1.0112, "step": 115 }, { "epoch": 0.53, "learning_rate": 0.00018493550749402278, "loss": 1.0279, "step": 120 }, { "epoch": 0.55, "learning_rate": 0.00018368996051610986, "loss": 1.105, "step": 125 }, { "epoch": 0.57, "learning_rate": 0.00018239949410131802, "loss": 1.041, "step": 130 }, { "epoch": 0.6, "learning_rate": 0.00018106480088977172, "loss": 1.0475, "step": 135 }, { "epoch": 0.62, "learning_rate": 0.00017968659725972112, "loss": 1.0241, "step": 140 }, { "epoch": 0.64, "learning_rate": 0.00017826562294303585, "loss": 1.0071, "step": 145 }, { "epoch": 0.66, "learning_rate": 0.0001768026406281642, "loss": 0.9714, "step": 150 }, { "epoch": 0.69, "learning_rate": 0.00017529843555077066, "loss": 1.0376, "step": 155 }, { "epoch": 0.71, "learning_rate": 0.00017375381507227108, "loss": 0.9806, "step": 160 }, { "epoch": 0.73, "learning_rate": 0.00017216960824649303, "loss": 1.1022, "step": 165 }, { "epoch": 0.75, "learning_rate": 0.00017054666537469213, "loss": 0.9723, "step": 170 }, { "epoch": 0.77, "learning_rate": 0.00016888585754916476, "loss": 1.1081, "step": 175 }, { "epoch": 0.8, "learning_rate": 0.00016718807618570106, "loss": 1.0525, "step": 180 }, { "epoch": 0.82, "learning_rate": 0.00016545423254513004, "loss": 1.0761, "step": 185 }, { "epoch": 0.84, "learning_rate": 0.00016368525724421248, "loss": 1.1493, "step": 190 }, { "epoch": 0.86, "learning_rate": 0.00016188209975614542, "loss": 0.9942, "step": 195 }, { "epoch": 0.88, "learning_rate": 0.00016004572790094535, "loss": 1.0068, "step": 200 }, { "epoch": 0.88, "eval_loss": 1.0737706422805786, "eval_runtime": 75.4895, "eval_samples_per_second": 11.988, "eval_steps_per_second": 1.51, "step": 200 }, { "epoch": 0.91, "learning_rate": 0.00015817712732598413, "loss": 1.0579, "step": 205 }, { "epoch": 0.93, "learning_rate": 0.00015627730097695638, "loss": 1.0985, "step": 210 }, { "epoch": 0.95, "learning_rate": 0.00015434726855956206, "loss": 1.071, "step": 215 }, { "epoch": 0.97, "learning_rate": 0.00015238806599219336, "loss": 1.0617, "step": 220 }, { "epoch": 0.99, "learning_rate": 0.00015040074484992, "loss": 0.9789, "step": 225 }, { "epoch": 1.02, "learning_rate": 0.00014838637180007047, "loss": 1.072, "step": 230 }, { "epoch": 1.04, "learning_rate": 0.00014634602802971312, "loss": 1.0098, "step": 235 }, { "epoch": 1.06, "learning_rate": 0.00014428080866534396, "loss": 1.0222, "step": 240 }, { "epoch": 1.08, "learning_rate": 0.0001421918221850923, "loss": 0.9734, "step": 245 }, { "epoch": 1.1, "learning_rate": 0.00014008018982376044, "loss": 0.9364, "step": 250 }, { "epoch": 1.13, "learning_rate": 0.00013794704497101655, "loss": 0.9735, "step": 255 }, { "epoch": 1.15, "learning_rate": 0.00013579353256306287, "loss": 1.0933, "step": 260 }, { "epoch": 1.17, "learning_rate": 0.00013362080846810725, "loss": 0.9764, "step": 265 }, { "epoch": 1.19, "learning_rate": 0.00013143003886596669, "loss": 1.0584, "step": 270 }, { "epoch": 1.22, "learning_rate": 0.00012922239962213637, "loss": 0.9116, "step": 275 }, { "epoch": 1.24, "learning_rate": 0.00012699907565665982, "loss": 1.0587, "step": 280 }, { "epoch": 1.26, "learning_rate": 0.00012476126030813963, "loss": 0.9342, "step": 285 }, { "epoch": 1.28, "learning_rate": 0.00012251015469322916, "loss": 0.9826, "step": 290 }, { "epoch": 1.3, "learning_rate": 0.00012024696706194967, "loss": 1.0656, "step": 295 }, { "epoch": 1.33, "learning_rate": 0.00011797291214917881, "loss": 1.0218, "step": 300 }, { "epoch": 1.33, "eval_loss": 1.06796395778656, "eval_runtime": 75.5412, "eval_samples_per_second": 11.98, "eval_steps_per_second": 1.509, "step": 300 }, { "epoch": 1.35, "learning_rate": 0.00011568921052265836, "loss": 0.9915, "step": 305 }, { "epoch": 1.37, "learning_rate": 0.00011339708792787119, "loss": 0.9592, "step": 310 }, { "epoch": 1.39, "learning_rate": 0.00011109777463013915, "loss": 0.9875, "step": 315 }, { "epoch": 1.41, "learning_rate": 0.00010879250475429523, "loss": 1.1583, "step": 320 }, { "epoch": 1.44, "learning_rate": 0.00010648251562228386, "loss": 0.977, "step": 325 }, { "epoch": 1.46, "learning_rate": 0.00010416904708904548, "loss": 1.041, "step": 330 }, { "epoch": 1.48, "learning_rate": 0.00010185334087704124, "loss": 0.972, "step": 335 }, { "epoch": 1.5, "learning_rate": 9.953663990977568e-05, "loss": 1.0115, "step": 340 }, { "epoch": 1.52, "learning_rate": 9.722018764467461e-05, "loss": 1.0078, "step": 345 }, { "epoch": 1.55, "learning_rate": 9.490522740567633e-05, "loss": 0.955, "step": 350 }, { "epoch": 1.57, "learning_rate": 9.259300171589456e-05, "loss": 0.9719, "step": 355 }, { "epoch": 1.59, "learning_rate": 9.028475163071141e-05, "loss": 0.871, "step": 360 }, { "epoch": 1.61, "learning_rate": 8.798171607165778e-05, "loss": 0.9568, "step": 365 }, { "epoch": 1.64, "learning_rate": 8.568513116143919e-05, "loss": 0.9466, "step": 370 }, { "epoch": 1.66, "learning_rate": 8.339622956046417e-05, "loss": 0.8967, "step": 375 }, { "epoch": 1.68, "learning_rate": 8.111623980523035e-05, "loss": 0.9465, "step": 380 }, { "epoch": 1.7, "learning_rate": 7.884638564892472e-05, "loss": 0.9499, "step": 385 }, { "epoch": 1.72, "learning_rate": 7.658788540459062e-05, "loss": 0.8955, "step": 390 }, { "epoch": 1.75, "learning_rate": 7.434195129121518e-05, "loss": 0.8982, "step": 395 }, { "epoch": 1.77, "learning_rate": 7.210978878308729e-05, "loss": 1.084, "step": 400 }, { "epoch": 1.77, "eval_loss": 1.061107873916626, "eval_runtime": 75.5635, "eval_samples_per_second": 11.977, "eval_steps_per_second": 1.509, "step": 400 }, { "epoch": 1.79, "learning_rate": 6.989259596277582e-05, "loss": 0.8936, "step": 405 }, { "epoch": 1.81, "learning_rate": 6.76915628780754e-05, "loss": 0.8794, "step": 410 }, { "epoch": 1.83, "learning_rate": 6.55078709032644e-05, "loss": 0.9631, "step": 415 }, { "epoch": 1.86, "learning_rate": 6.334269210501875e-05, "loss": 0.9636, "step": 420 }, { "epoch": 1.88, "learning_rate": 6.119718861332098e-05, "loss": 0.9761, "step": 425 }, { "epoch": 1.9, "learning_rate": 5.9072511997703226e-05, "loss": 0.9732, "step": 430 }, { "epoch": 1.92, "learning_rate": 5.696980264915777e-05, "loss": 0.995, "step": 435 }, { "epoch": 1.94, "learning_rate": 5.489018916804813e-05, "loss": 1.0037, "step": 440 }, { "epoch": 1.97, "learning_rate": 5.283478775834811e-05, "loss": 0.8503, "step": 445 }, { "epoch": 1.99, "learning_rate": 5.080470162853472e-05, "loss": 0.8914, "step": 450 }, { "epoch": 2.01, "learning_rate": 4.880102039945624e-05, "loss": 0.9531, "step": 455 }, { "epoch": 2.03, "learning_rate": 4.6824819519493057e-05, "loss": 0.9294, "step": 460 }, { "epoch": 2.06, "learning_rate": 4.487715968732568e-05, "loss": 0.8584, "step": 465 }, { "epoch": 2.08, "learning_rate": 4.29590862826191e-05, "loss": 0.9285, "step": 470 }, { "epoch": 2.1, "learning_rate": 4.107162880492984e-05, "loss": 0.8742, "step": 475 }, { "epoch": 2.12, "learning_rate": 3.921580032113602e-05, "loss": 0.7903, "step": 480 }, { "epoch": 2.14, "learning_rate": 3.739259692168764e-05, "loss": 0.8287, "step": 485 }, { "epoch": 2.17, "learning_rate": 3.560299718596889e-05, "loss": 0.8967, "step": 490 }, { "epoch": 2.19, "learning_rate": 3.3847961657058845e-05, "loss": 0.841, "step": 495 }, { "epoch": 2.21, "learning_rate": 3.212843232617343e-05, "loss": 0.897, "step": 500 }, { "epoch": 2.21, "eval_loss": 1.0756696462631226, "eval_runtime": 75.5409, "eval_samples_per_second": 11.98, "eval_steps_per_second": 1.509, "step": 500 }, { "epoch": 2.23, "learning_rate": 3.0445332127064275e-05, "loss": 0.9455, "step": 505 }, { "epoch": 2.25, "learning_rate": 2.879956444064703e-05, "loss": 0.9292, "step": 510 }, { "epoch": 2.28, "learning_rate": 2.7192012610123774e-05, "loss": 0.9016, "step": 515 }, { "epoch": 2.3, "learning_rate": 2.5623539466860813e-05, "loss": 0.937, "step": 520 }, { "epoch": 2.32, "learning_rate": 2.409498686727587e-05, "loss": 0.8348, "step": 525 }, { "epoch": 2.34, "learning_rate": 2.2607175240983026e-05, "loss": 0.8387, "step": 530 }, { "epoch": 2.36, "learning_rate": 2.1160903150438605e-05, "loss": 0.7736, "step": 535 }, { "epoch": 2.39, "learning_rate": 1.9756946862323535e-05, "loss": 0.8672, "step": 540 }, { "epoch": 2.41, "learning_rate": 1.839605993089307e-05, "loss": 0.8688, "step": 545 }, { "epoch": 2.43, "learning_rate": 1.707897279351671e-05, "loss": 0.8631, "step": 550 }, { "epoch": 2.45, "learning_rate": 1.580639237862608e-05, "loss": 0.9171, "step": 555 }, { "epoch": 2.48, "learning_rate": 1.4579001726280828e-05, "loss": 0.9134, "step": 560 }, { "epoch": 2.5, "learning_rate": 1.339745962155613e-05, "loss": 0.8363, "step": 565 }, { "epoch": 2.52, "learning_rate": 1.2262400240949023e-05, "loss": 0.8862, "step": 570 }, { "epoch": 2.54, "learning_rate": 1.1174432811992685e-05, "loss": 0.9154, "step": 575 }, { "epoch": 2.56, "learning_rate": 1.013414128626211e-05, "loss": 0.9423, "step": 580 }, { "epoch": 2.59, "learning_rate": 9.142084025945984e-06, "loss": 0.8462, "step": 585 }, { "epoch": 2.61, "learning_rate": 8.19879350415349e-06, "loss": 0.8019, "step": 590 }, { "epoch": 2.63, "learning_rate": 7.30477601911671e-06, "loss": 0.861, "step": 595 }, { "epoch": 2.65, "learning_rate": 6.460511422441984e-06, "loss": 0.9597, "step": 600 }, { "epoch": 2.65, "eval_loss": 1.0745042562484741, "eval_runtime": 75.5387, "eval_samples_per_second": 11.981, "eval_steps_per_second": 1.509, "step": 600 }, { "epoch": 2.67, "learning_rate": 5.66645286155616e-06, "loss": 0.7797, "step": 605 }, { "epoch": 2.7, "learning_rate": 4.923026536485875e-06, "loss": 0.9222, "step": 610 }, { "epoch": 2.72, "learning_rate": 4.230631471100655e-06, "loss": 0.9732, "step": 615 }, { "epoch": 2.74, "learning_rate": 3.5896392989422377e-06, "loss": 0.7366, "step": 620 }, { "epoch": 2.76, "learning_rate": 3.000394063755396e-06, "loss": 0.9243, "step": 625 }, { "epoch": 2.78, "learning_rate": 2.4632120348272003e-06, "loss": 0.9263, "step": 630 }, { "epoch": 2.81, "learning_rate": 1.9783815372338423e-06, "loss": 0.8541, "step": 635 }, { "epoch": 2.83, "learning_rate": 1.5461627970860814e-06, "loss": 0.828, "step": 640 }, { "epoch": 2.85, "learning_rate": 1.1667878018564171e-06, "loss": 0.8446, "step": 645 }, { "epoch": 2.87, "learning_rate": 8.404601758630892e-07, "loss": 0.7749, "step": 650 }, { "epoch": 2.9, "learning_rate": 5.673550709774267e-07, "loss": 0.8738, "step": 655 }, { "epoch": 2.92, "learning_rate": 3.4761907261356976e-07, "loss": 0.895, "step": 660 }, { "epoch": 2.94, "learning_rate": 1.813701210506946e-07, "loss": 0.8552, "step": 665 }, { "epoch": 2.96, "learning_rate": 6.869744813023937e-08, "loss": 0.8475, "step": 670 }, { "epoch": 2.98, "learning_rate": 9.661529361892907e-09, "loss": 0.9076, "step": 675 }, { "epoch": 3.0, "step": 678, "total_flos": 1.5685942606774272e+17, "train_loss": 0.9757168982232918, "train_runtime": 3618.1154, "train_samples_per_second": 3.0, "train_steps_per_second": 0.187 } ], "logging_steps": 5, "max_steps": 678, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 1.5685942606774272e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }