{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 8240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12135922330097088, "grad_norm": 1.133748173713684, "learning_rate": 9.696601941747573e-05, "loss": 1.076, "step": 250 }, { "epoch": 0.24271844660194175, "grad_norm": 0.9746648073196411, "learning_rate": 9.393203883495146e-05, "loss": 0.571, "step": 500 }, { "epoch": 0.3640776699029126, "grad_norm": 0.7970510125160217, "learning_rate": 9.089805825242718e-05, "loss": 0.5098, "step": 750 }, { "epoch": 0.4854368932038835, "grad_norm": 0.6922478079795837, "learning_rate": 8.786407766990292e-05, "loss": 0.4863, "step": 1000 }, { "epoch": 0.6067961165048543, "grad_norm": 0.6843756437301636, "learning_rate": 8.483009708737865e-05, "loss": 0.4761, "step": 1250 }, { "epoch": 0.7281553398058253, "grad_norm": 0.872114896774292, "learning_rate": 8.179611650485438e-05, "loss": 0.4555, "step": 1500 }, { "epoch": 0.8495145631067961, "grad_norm": 0.9166551232337952, "learning_rate": 7.87621359223301e-05, "loss": 0.4359, "step": 1750 }, { "epoch": 0.970873786407767, "grad_norm": 1.0107553005218506, "learning_rate": 7.572815533980583e-05, "loss": 0.4104, "step": 2000 }, { "epoch": 1.0922330097087378, "grad_norm": 1.0530173778533936, "learning_rate": 7.269417475728155e-05, "loss": 0.3704, "step": 2250 }, { "epoch": 1.2135922330097086, "grad_norm": 1.1713433265686035, "learning_rate": 6.966019417475728e-05, "loss": 0.3416, "step": 2500 }, { "epoch": 1.3349514563106797, "grad_norm": 0.9982771277427673, "learning_rate": 6.662621359223301e-05, "loss": 0.319, "step": 2750 }, { "epoch": 1.4563106796116505, "grad_norm": 1.0818179845809937, "learning_rate": 6.359223300970875e-05, "loss": 0.2962, "step": 3000 }, { "epoch": 1.5776699029126213, "grad_norm": 1.1374905109405518, "learning_rate": 6.055825242718447e-05, "loss": 0.2797, "step": 3250 }, { "epoch": 1.6990291262135924, "grad_norm": 1.12690007686615, "learning_rate": 5.752427184466019e-05, "loss": 0.2663, "step": 3500 }, { "epoch": 1.820388349514563, "grad_norm": 1.0406389236450195, "learning_rate": 5.4490291262135926e-05, "loss": 0.2563, "step": 3750 }, { "epoch": 1.941747572815534, "grad_norm": 0.8830968737602234, "learning_rate": 5.145631067961165e-05, "loss": 0.2461, "step": 4000 }, { "epoch": 2.063106796116505, "grad_norm": 1.0481204986572266, "learning_rate": 4.8422330097087385e-05, "loss": 0.2234, "step": 4250 }, { "epoch": 2.1844660194174756, "grad_norm": 1.004947543144226, "learning_rate": 4.538834951456311e-05, "loss": 0.2099, "step": 4500 }, { "epoch": 2.3058252427184467, "grad_norm": 0.9718777537345886, "learning_rate": 4.235436893203884e-05, "loss": 0.2075, "step": 4750 }, { "epoch": 2.4271844660194173, "grad_norm": 1.0882912874221802, "learning_rate": 3.9320388349514564e-05, "loss": 0.2038, "step": 5000 }, { "epoch": 2.5485436893203883, "grad_norm": 0.9357613921165466, "learning_rate": 3.62864077669903e-05, "loss": 0.2004, "step": 5250 }, { "epoch": 2.6699029126213594, "grad_norm": 1.0756570100784302, "learning_rate": 3.325242718446602e-05, "loss": 0.1951, "step": 5500 }, { "epoch": 2.79126213592233, "grad_norm": 0.9342887997627258, "learning_rate": 3.0218446601941746e-05, "loss": 0.1943, "step": 5750 }, { "epoch": 2.912621359223301, "grad_norm": 0.865288496017456, "learning_rate": 2.7184466019417475e-05, "loss": 0.1889, "step": 6000 }, { "epoch": 3.033980582524272, "grad_norm": 0.807023823261261, "learning_rate": 2.4150485436893205e-05, "loss": 0.1811, "step": 6250 }, { "epoch": 3.1553398058252426, "grad_norm": 0.8565006256103516, "learning_rate": 2.111650485436893e-05, "loss": 0.1698, "step": 6500 }, { "epoch": 3.2766990291262137, "grad_norm": 0.9177230596542358, "learning_rate": 1.808252427184466e-05, "loss": 0.1684, "step": 6750 }, { "epoch": 3.3980582524271843, "grad_norm": 1.0113624334335327, "learning_rate": 1.5048543689320387e-05, "loss": 0.1674, "step": 7000 }, { "epoch": 3.5194174757281553, "grad_norm": 0.7621839046478271, "learning_rate": 1.2014563106796117e-05, "loss": 0.1666, "step": 7250 }, { "epoch": 3.6407766990291264, "grad_norm": 0.872179388999939, "learning_rate": 8.980582524271845e-06, "loss": 0.1652, "step": 7500 }, { "epoch": 3.762135922330097, "grad_norm": 0.8785023093223572, "learning_rate": 5.946601941747574e-06, "loss": 0.1643, "step": 7750 }, { "epoch": 3.883495145631068, "grad_norm": 0.8887606859207153, "learning_rate": 2.912621359223301e-06, "loss": 0.1645, "step": 8000 }, { "epoch": 4.0, "step": 8240, "total_flos": 2.94256216740778e+18, "train_loss": 0.30107544000866343, "train_runtime": 47583.3127, "train_samples_per_second": 11.083, "train_steps_per_second": 0.173 } ], "logging_steps": 250, "max_steps": 8240, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.94256216740778e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }