{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.09528346831824679, "eval_steps": 5, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0019056693663649356, "eval_loss": 3.0111310482025146, "eval_runtime": 139.5262, "eval_samples_per_second": 6.336, "eval_steps_per_second": 0.796, "step": 1 }, { "epoch": 0.005717008099094807, "grad_norm": 1.745223045349121, "learning_rate": 3e-05, "loss": 2.8529, "step": 3 }, { "epoch": 0.009528346831824679, "eval_loss": 2.849209785461426, "eval_runtime": 140.6511, "eval_samples_per_second": 6.285, "eval_steps_per_second": 0.789, "step": 5 }, { "epoch": 0.011434016198189614, "grad_norm": 1.5631052255630493, "learning_rate": 6e-05, "loss": 2.8163, "step": 6 }, { "epoch": 0.01715102429728442, "grad_norm": 1.3446643352508545, "learning_rate": 9e-05, "loss": 2.5454, "step": 9 }, { "epoch": 0.019056693663649357, "eval_loss": 2.5639851093292236, "eval_runtime": 140.6263, "eval_samples_per_second": 6.286, "eval_steps_per_second": 0.789, "step": 10 }, { "epoch": 0.022868032396379228, "grad_norm": 1.6127707958221436, "learning_rate": 9.938441702975689e-05, "loss": 2.5243, "step": 12 }, { "epoch": 0.028585040495474036, "grad_norm": 1.2866557836532593, "learning_rate": 9.619397662556435e-05, "loss": 2.3552, "step": 15 }, { "epoch": 0.028585040495474036, "eval_loss": 2.4209542274475098, "eval_runtime": 140.5703, "eval_samples_per_second": 6.289, "eval_steps_per_second": 0.79, "step": 15 }, { "epoch": 0.03430204859456884, "grad_norm": 1.7916969060897827, "learning_rate": 9.045084971874738e-05, "loss": 2.6218, "step": 18 }, { "epoch": 0.038113387327298714, "eval_loss": 2.3375890254974365, "eval_runtime": 140.5851, "eval_samples_per_second": 6.288, "eval_steps_per_second": 0.79, "step": 20 }, { "epoch": 0.04001905669366365, "grad_norm": 1.3049689531326294, "learning_rate": 8.247240241650918e-05, "loss": 2.2512, "step": 21 }, { "epoch": 0.045736064792758456, "grad_norm": 1.2573634386062622, "learning_rate": 7.269952498697734e-05, "loss": 2.2554, "step": 24 }, { "epoch": 0.04764173415912339, "eval_loss": 2.2932698726654053, "eval_runtime": 140.5269, "eval_samples_per_second": 6.291, "eval_steps_per_second": 0.79, "step": 25 }, { "epoch": 0.05145307289185326, "grad_norm": 1.093232274055481, "learning_rate": 6.167226819279528e-05, "loss": 2.1642, "step": 27 }, { "epoch": 0.05717008099094807, "grad_norm": 1.1265302896499634, "learning_rate": 5e-05, "loss": 2.2761, "step": 30 }, { "epoch": 0.05717008099094807, "eval_loss": 2.271261215209961, "eval_runtime": 140.6344, "eval_samples_per_second": 6.286, "eval_steps_per_second": 0.789, "step": 30 }, { "epoch": 0.06288708909004288, "grad_norm": 3.7034900188446045, "learning_rate": 3.832773180720475e-05, "loss": 2.2029, "step": 33 }, { "epoch": 0.06669842782277274, "eval_loss": 2.2650718688964844, "eval_runtime": 140.6225, "eval_samples_per_second": 6.286, "eval_steps_per_second": 0.789, "step": 35 }, { "epoch": 0.06860409718913768, "grad_norm": 1.4284329414367676, "learning_rate": 2.7300475013022663e-05, "loss": 2.2225, "step": 36 }, { "epoch": 0.07432110528823249, "grad_norm": 1.1168383359909058, "learning_rate": 1.7527597583490822e-05, "loss": 2.108, "step": 39 }, { "epoch": 0.07622677465459743, "eval_loss": 2.249340534210205, "eval_runtime": 140.5814, "eval_samples_per_second": 6.288, "eval_steps_per_second": 0.79, "step": 40 }, { "epoch": 0.0800381133873273, "grad_norm": 0.9819179773330688, "learning_rate": 9.549150281252633e-06, "loss": 2.1547, "step": 42 }, { "epoch": 0.0857551214864221, "grad_norm": 0.888201892375946, "learning_rate": 3.8060233744356633e-06, "loss": 2.1645, "step": 45 }, { "epoch": 0.0857551214864221, "eval_loss": 2.2474822998046875, "eval_runtime": 140.5849, "eval_samples_per_second": 6.288, "eval_steps_per_second": 0.79, "step": 45 }, { "epoch": 0.09147212958551691, "grad_norm": 1.5101863145828247, "learning_rate": 6.15582970243117e-07, "loss": 2.1671, "step": 48 }, { "epoch": 0.09528346831824679, "eval_loss": 2.2469546794891357, "eval_runtime": 140.6128, "eval_samples_per_second": 6.287, "eval_steps_per_second": 0.789, "step": 50 } ], "logging_steps": 3, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.508513578745856e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }