{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.017190991920233798, "eval_steps": 5, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00034381983840467596, "eval_loss": 5.879480838775635, "eval_runtime": 170.3546, "eval_samples_per_second": 28.758, "eval_steps_per_second": 3.598, "step": 1 }, { "epoch": 0.0010314595152140279, "grad_norm": 41.21235275268555, "learning_rate": 3e-05, "loss": 5.2239, "step": 3 }, { "epoch": 0.0017190991920233798, "eval_loss": 1.8665575981140137, "eval_runtime": 171.959, "eval_samples_per_second": 28.489, "eval_steps_per_second": 3.565, "step": 5 }, { "epoch": 0.0020629190304280558, "grad_norm": 10.785271644592285, "learning_rate": 6e-05, "loss": 2.5669, "step": 6 }, { "epoch": 0.0030943785456420837, "grad_norm": 4.260767459869385, "learning_rate": 9e-05, "loss": 1.0414, "step": 9 }, { "epoch": 0.0034381983840467596, "eval_loss": 0.7525524497032166, "eval_runtime": 171.9965, "eval_samples_per_second": 28.483, "eval_steps_per_second": 3.564, "step": 10 }, { "epoch": 0.0041258380608561115, "grad_norm": 4.613126754760742, "learning_rate": 9.938441702975689e-05, "loss": 0.7269, "step": 12 }, { "epoch": 0.005157297576070139, "grad_norm": 2.771132469177246, "learning_rate": 9.619397662556435e-05, "loss": 0.688, "step": 15 }, { "epoch": 0.005157297576070139, "eval_loss": 0.6774925589561462, "eval_runtime": 171.919, "eval_samples_per_second": 28.496, "eval_steps_per_second": 3.566, "step": 15 }, { "epoch": 0.006188757091284167, "grad_norm": 2.5343384742736816, "learning_rate": 9.045084971874738e-05, "loss": 0.6198, "step": 18 }, { "epoch": 0.006876396768093519, "eval_loss": 0.6454147100448608, "eval_runtime": 171.8957, "eval_samples_per_second": 28.5, "eval_steps_per_second": 3.566, "step": 20 }, { "epoch": 0.007220216606498195, "grad_norm": 2.4850144386291504, "learning_rate": 8.247240241650918e-05, "loss": 0.7168, "step": 21 }, { "epoch": 0.008251676121712223, "grad_norm": 2.3500659465789795, "learning_rate": 7.269952498697734e-05, "loss": 0.6841, "step": 24 }, { "epoch": 0.008595495960116899, "eval_loss": 0.6237576007843018, "eval_runtime": 171.9416, "eval_samples_per_second": 28.492, "eval_steps_per_second": 3.565, "step": 25 }, { "epoch": 0.009283135636926251, "grad_norm": 2.28306245803833, "learning_rate": 6.167226819279528e-05, "loss": 0.6412, "step": 27 }, { "epoch": 0.010314595152140279, "grad_norm": 2.4008116722106934, "learning_rate": 5e-05, "loss": 0.6312, "step": 30 }, { "epoch": 0.010314595152140279, "eval_loss": 0.6085503101348877, "eval_runtime": 171.9315, "eval_samples_per_second": 28.494, "eval_steps_per_second": 3.565, "step": 30 }, { "epoch": 0.011346054667354307, "grad_norm": 2.578450918197632, "learning_rate": 3.832773180720475e-05, "loss": 0.615, "step": 33 }, { "epoch": 0.012033694344163659, "eval_loss": 0.5967410802841187, "eval_runtime": 171.9827, "eval_samples_per_second": 28.485, "eval_steps_per_second": 3.564, "step": 35 }, { "epoch": 0.012377514182568335, "grad_norm": 2.038388729095459, "learning_rate": 2.7300475013022663e-05, "loss": 0.5948, "step": 36 }, { "epoch": 0.013408973697782363, "grad_norm": 2.125666379928589, "learning_rate": 1.7527597583490822e-05, "loss": 0.6002, "step": 39 }, { "epoch": 0.013752793536187038, "eval_loss": 0.5893370509147644, "eval_runtime": 171.9189, "eval_samples_per_second": 28.496, "eval_steps_per_second": 3.566, "step": 40 }, { "epoch": 0.01444043321299639, "grad_norm": 2.277637004852295, "learning_rate": 9.549150281252633e-06, "loss": 0.6284, "step": 42 }, { "epoch": 0.015471892728210418, "grad_norm": 2.8201003074645996, "learning_rate": 3.8060233744356633e-06, "loss": 0.5752, "step": 45 }, { "epoch": 0.015471892728210418, "eval_loss": 0.5873016119003296, "eval_runtime": 171.9639, "eval_samples_per_second": 28.489, "eval_steps_per_second": 3.565, "step": 45 }, { "epoch": 0.016503352243424446, "grad_norm": 2.28126859664917, "learning_rate": 6.15582970243117e-07, "loss": 0.5964, "step": 48 }, { "epoch": 0.017190991920233798, "eval_loss": 0.5866773128509521, "eval_runtime": 171.925, "eval_samples_per_second": 28.495, "eval_steps_per_second": 3.566, "step": 50 } ], "logging_steps": 3, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.02538779410432e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }