{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.001295840352468576, "eval_steps": 9, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.183361409874304e-05, "grad_norm": 2.3113319873809814, "learning_rate": 1e-05, "loss": 3.5351, "step": 1 }, { "epoch": 5.183361409874304e-05, "eval_loss": 3.5113468170166016, "eval_runtime": 1769.9186, "eval_samples_per_second": 9.18, "eval_steps_per_second": 1.148, "step": 1 }, { "epoch": 0.00010366722819748608, "grad_norm": 2.229635238647461, "learning_rate": 2e-05, "loss": 3.4005, "step": 2 }, { "epoch": 0.00015550084229622911, "grad_norm": 2.0467476844787598, "learning_rate": 3e-05, "loss": 3.5197, "step": 3 }, { "epoch": 0.00020733445639497215, "grad_norm": 2.512348175048828, "learning_rate": 4e-05, "loss": 3.621, "step": 4 }, { "epoch": 0.0002591680704937152, "grad_norm": 2.532371997833252, "learning_rate": 5e-05, "loss": 3.5631, "step": 5 }, { "epoch": 0.00031100168459245823, "grad_norm": 2.249849319458008, "learning_rate": 6e-05, "loss": 3.404, "step": 6 }, { "epoch": 0.00036283529869120127, "grad_norm": 2.5401272773742676, "learning_rate": 7e-05, "loss": 3.4107, "step": 7 }, { "epoch": 0.0004146689127899443, "grad_norm": 2.23197603225708, "learning_rate": 8e-05, "loss": 3.0562, "step": 8 }, { "epoch": 0.0004665025268886873, "grad_norm": 2.439784526824951, "learning_rate": 9e-05, "loss": 3.0909, "step": 9 }, { "epoch": 0.0004665025268886873, "eval_loss": 2.900395393371582, "eval_runtime": 1774.4876, "eval_samples_per_second": 9.156, "eval_steps_per_second": 1.145, "step": 9 }, { "epoch": 0.0005183361409874304, "grad_norm": 2.121995687484741, "learning_rate": 0.0001, "loss": 2.7377, "step": 10 }, { "epoch": 0.0005701697550861734, "grad_norm": 2.543422222137451, "learning_rate": 9.99695413509548e-05, "loss": 2.8312, "step": 11 }, { "epoch": 0.0006220033691849165, "grad_norm": 3.130173444747925, "learning_rate": 9.987820251299122e-05, "loss": 2.8057, "step": 12 }, { "epoch": 0.0006738369832836595, "grad_norm": 2.596888542175293, "learning_rate": 9.972609476841367e-05, "loss": 2.6877, "step": 13 }, { "epoch": 0.0007256705973824025, "grad_norm": 2.7695186138153076, "learning_rate": 9.951340343707852e-05, "loss": 2.8845, "step": 14 }, { "epoch": 0.0007775042114811456, "grad_norm": 2.208770751953125, "learning_rate": 9.924038765061042e-05, "loss": 2.4707, "step": 15 }, { "epoch": 0.0008293378255798886, "grad_norm": 2.075831174850464, "learning_rate": 9.890738003669029e-05, "loss": 2.6753, "step": 16 }, { "epoch": 0.0008811714396786315, "grad_norm": 3.6710920333862305, "learning_rate": 9.851478631379982e-05, "loss": 2.5295, "step": 17 }, { "epoch": 0.0009330050537773746, "grad_norm": 1.9427752494812012, "learning_rate": 9.806308479691595e-05, "loss": 2.5586, "step": 18 }, { "epoch": 0.0009330050537773746, "eval_loss": 2.5122954845428467, "eval_runtime": 1772.457, "eval_samples_per_second": 9.166, "eval_steps_per_second": 1.146, "step": 18 }, { "epoch": 0.0009848386678761177, "grad_norm": 2.3610899448394775, "learning_rate": 9.755282581475769e-05, "loss": 2.5588, "step": 19 }, { "epoch": 0.0010366722819748608, "grad_norm": 1.9613486528396606, "learning_rate": 9.698463103929542e-05, "loss": 2.5903, "step": 20 }, { "epoch": 0.0010885058960736038, "grad_norm": 1.8334481716156006, "learning_rate": 9.635919272833938e-05, "loss": 2.4418, "step": 21 }, { "epoch": 0.0011403395101723468, "grad_norm": 1.7611281871795654, "learning_rate": 9.567727288213005e-05, "loss": 2.5619, "step": 22 }, { "epoch": 0.0011921731242710899, "grad_norm": 2.285313606262207, "learning_rate": 9.493970231495835e-05, "loss": 2.4165, "step": 23 }, { "epoch": 0.001244006738369833, "grad_norm": 1.6585972309112549, "learning_rate": 9.414737964294636e-05, "loss": 2.4672, "step": 24 }, { "epoch": 0.001295840352468576, "grad_norm": 1.6722080707550049, "learning_rate": 9.330127018922194e-05, "loss": 2.3507, "step": 25 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.85471820890112e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }