{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 77, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.012987012987012988, "grad_norm": 2.5498344898223877, "learning_rate": 2.5e-05, "loss": 4.7663, "step": 1 }, { "epoch": 0.06493506493506493, "grad_norm": 2.774350881576538, "learning_rate": 0.000125, "loss": 4.7043, "step": 5 }, { "epoch": 0.12987012987012986, "grad_norm": 3.9560413360595703, "learning_rate": 0.00019958568425315314, "loss": 4.0675, "step": 10 }, { "epoch": 0.19480519480519481, "grad_norm": 3.9846060276031494, "learning_rate": 0.00019496396989003193, "loss": 3.0353, "step": 15 }, { "epoch": 0.2597402597402597, "grad_norm": 2.2558484077453613, "learning_rate": 0.00018544194045464886, "loss": 2.3784, "step": 20 }, { "epoch": 0.3246753246753247, "grad_norm": 0.9705212712287903, "learning_rate": 0.00017151095013548994, "loss": 1.9776, "step": 25 }, { "epoch": 0.38961038961038963, "grad_norm": 0.6810367107391357, "learning_rate": 0.00015388986359155758, "loss": 1.695, "step": 30 }, { "epoch": 0.45454545454545453, "grad_norm": 0.5922189950942993, "learning_rate": 0.00013348796121709862, "loss": 1.5304, "step": 35 }, { "epoch": 0.5194805194805194, "grad_norm": 0.5001540780067444, "learning_rate": 0.00011135801860504749, "loss": 1.4319, "step": 40 }, { "epoch": 0.5844155844155844, "grad_norm": 0.4108904302120209, "learning_rate": 8.86419813949525e-05, "loss": 1.3664, "step": 45 }, { "epoch": 0.6493506493506493, "grad_norm": 0.4516855776309967, "learning_rate": 6.651203878290139e-05, "loss": 1.3354, "step": 50 }, { "epoch": 0.7142857142857143, "grad_norm": 0.3778669238090515, "learning_rate": 4.611013640844245e-05, "loss": 1.309, "step": 55 }, { "epoch": 0.7792207792207793, "grad_norm": 0.3914947509765625, "learning_rate": 2.8489049864510054e-05, "loss": 1.2981, "step": 60 }, { "epoch": 0.8441558441558441, "grad_norm": 0.370316743850708, "learning_rate": 1.4558059545351143e-05, "loss": 1.2884, "step": 65 }, { "epoch": 0.9090909090909091, "grad_norm": 0.3161383867263794, "learning_rate": 5.036030109968082e-06, "loss": 1.2853, "step": 70 }, { "epoch": 0.974025974025974, "grad_norm": 0.29596462845802307, "learning_rate": 4.143157468468717e-07, "loss": 1.2788, "step": 75 }, { "epoch": 1.0, "eval_loss": 1.8376365900039673, "eval_runtime": 2.2411, "eval_samples_per_second": 6.247, "eval_steps_per_second": 0.446, "step": 77 }, { "epoch": 1.0, "step": 77, "total_flos": 1.5573103901178593e+18, "train_loss": 1.9809850438848717, "train_runtime": 1063.0601, "train_samples_per_second": 37.02, "train_steps_per_second": 0.072 } ], "logging_steps": 5, "max_steps": 77, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.5573103901178593e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }