{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.89247311827957, "eval_steps": 500, "global_step": 460, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.21505376344086022, "grad_norm": NaN, "learning_rate": 0.00019976687691905393, "loss": 0.0, "step": 10 }, { "epoch": 0.43010752688172044, "grad_norm": NaN, "learning_rate": 0.00019906859460363307, "loss": 0.0, "step": 20 }, { "epoch": 0.6451612903225806, "grad_norm": NaN, "learning_rate": 0.00019790840876823232, "loss": 0.0, "step": 30 }, { "epoch": 0.8602150537634409, "grad_norm": NaN, "learning_rate": 0.00019629172873477995, "loss": 0.0, "step": 40 }, { "epoch": 1.075268817204301, "grad_norm": NaN, "learning_rate": 0.00019422609221188207, "loss": 0.0, "step": 50 }, { "epoch": 1.2903225806451613, "grad_norm": NaN, "learning_rate": 0.00019172113015054532, "loss": 0.0, "step": 60 }, { "epoch": 1.5053763440860215, "grad_norm": NaN, "learning_rate": 0.0001887885218402375, "loss": 0.0, "step": 70 }, { "epoch": 1.7204301075268817, "grad_norm": NaN, "learning_rate": 0.00018544194045464886, "loss": 0.0, "step": 80 }, { "epoch": 1.935483870967742, "grad_norm": NaN, "learning_rate": 0.0001816969893010442, "loss": 0.0, "step": 90 }, { "epoch": 2.150537634408602, "grad_norm": NaN, "learning_rate": 0.000177571129070442, "loss": 0.0, "step": 100 }, { "epoch": 2.3655913978494625, "grad_norm": NaN, "learning_rate": 0.00017308359642781242, "loss": 0.0, "step": 110 }, { "epoch": 2.5806451612903225, "grad_norm": NaN, "learning_rate": 0.00016825531432186543, "loss": 0.0, "step": 120 }, { "epoch": 2.795698924731183, "grad_norm": NaN, "learning_rate": 0.00016310879443260528, "loss": 0.0, "step": 130 }, { "epoch": 3.010752688172043, "grad_norm": NaN, "learning_rate": 0.00015766803221148673, "loss": 0.0, "step": 140 }, { "epoch": 3.225806451612903, "grad_norm": NaN, "learning_rate": 0.00015195839500354335, "loss": 0.0, "step": 150 }, { "epoch": 3.4408602150537635, "grad_norm": NaN, "learning_rate": 0.00014600650377311522, "loss": 0.0, "step": 160 }, { "epoch": 3.6559139784946235, "grad_norm": NaN, "learning_rate": 0.00013984010898462416, "loss": 0.0, "step": 170 }, { "epoch": 3.870967741935484, "grad_norm": NaN, "learning_rate": 0.00013348796121709862, "loss": 0.0, "step": 180 }, { "epoch": 4.086021505376344, "grad_norm": NaN, "learning_rate": 0.00012697967711570242, "loss": 0.0, "step": 190 }, { "epoch": 4.301075268817204, "grad_norm": NaN, "learning_rate": 0.0001203456013052634, "loss": 0.0, "step": 200 }, { "epoch": 4.516129032258064, "grad_norm": NaN, "learning_rate": 0.00011361666490962468, "loss": 0.0, "step": 210 }, { "epoch": 4.731182795698925, "grad_norm": NaN, "learning_rate": 0.0001068242413364671, "loss": 0.0, "step": 220 }, { "epoch": 4.946236559139785, "grad_norm": NaN, "learning_rate": 0.0001, "loss": 0.0, "step": 230 }, { "epoch": 5.161290322580645, "grad_norm": NaN, "learning_rate": 9.317575866353292e-05, "loss": 0.0, "step": 240 }, { "epoch": 5.376344086021505, "grad_norm": NaN, "learning_rate": 8.638333509037536e-05, "loss": 0.0, "step": 250 }, { "epoch": 5.591397849462366, "grad_norm": NaN, "learning_rate": 7.965439869473664e-05, "loss": 0.0, "step": 260 }, { "epoch": 5.806451612903226, "grad_norm": NaN, "learning_rate": 7.302032288429756e-05, "loss": 0.0, "step": 270 }, { "epoch": 6.021505376344086, "grad_norm": NaN, "learning_rate": 6.651203878290139e-05, "loss": 0.0, "step": 280 }, { "epoch": 6.236559139784946, "grad_norm": NaN, "learning_rate": 6.015989101537586e-05, "loss": 0.0, "step": 290 }, { "epoch": 6.451612903225806, "grad_norm": NaN, "learning_rate": 5.399349622688479e-05, "loss": 0.0, "step": 300 }, { "epoch": 6.666666666666667, "grad_norm": NaN, "learning_rate": 4.804160499645667e-05, "loss": 0.0, "step": 310 }, { "epoch": 6.881720430107527, "grad_norm": NaN, "learning_rate": 4.2331967788513295e-05, "loss": 0.0, "step": 320 }, { "epoch": 7.096774193548387, "grad_norm": NaN, "learning_rate": 3.689120556739475e-05, "loss": 0.0, "step": 330 }, { "epoch": 7.311827956989247, "grad_norm": NaN, "learning_rate": 3.174468567813461e-05, "loss": 0.0, "step": 340 }, { "epoch": 7.526881720430108, "grad_norm": NaN, "learning_rate": 2.691640357218759e-05, "loss": 0.0, "step": 350 }, { "epoch": 7.741935483870968, "grad_norm": NaN, "learning_rate": 2.242887092955801e-05, "loss": 0.0, "step": 360 }, { "epoch": 7.956989247311828, "grad_norm": NaN, "learning_rate": 1.8303010698955804e-05, "loss": 0.0, "step": 370 }, { "epoch": 8.172043010752688, "grad_norm": NaN, "learning_rate": 1.4558059545351143e-05, "loss": 0.0, "step": 380 }, { "epoch": 8.387096774193548, "grad_norm": NaN, "learning_rate": 1.1211478159762478e-05, "loss": 0.0, "step": 390 }, { "epoch": 8.602150537634408, "grad_norm": NaN, "learning_rate": 8.278869849454718e-06, "loss": 0.0, "step": 400 }, { "epoch": 8.817204301075268, "grad_norm": NaN, "learning_rate": 5.77390778811796e-06, "loss": 0.0, "step": 410 }, { "epoch": 9.03225806451613, "grad_norm": NaN, "learning_rate": 3.7082712652200867e-06, "loss": 0.0, "step": 420 }, { "epoch": 9.24731182795699, "grad_norm": NaN, "learning_rate": 2.091591231767709e-06, "loss": 0.0, "step": 430 }, { "epoch": 9.46236559139785, "grad_norm": NaN, "learning_rate": 9.314053963669245e-07, "loss": 0.0, "step": 440 }, { "epoch": 9.67741935483871, "grad_norm": NaN, "learning_rate": 2.3312308094607382e-07, "loss": 0.0, "step": 450 }, { "epoch": 9.89247311827957, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 460 }, { "epoch": 9.89247311827957, "step": 460, "total_flos": 2.262683294367744e+16, "train_loss": 0.0, "train_runtime": 377.8672, "train_samples_per_second": 4.922, "train_steps_per_second": 1.217 } ], "logging_steps": 10, "max_steps": 460, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 2.262683294367744e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }