{ "best_metric": 0.9282445542742083, "best_model_checkpoint": "output/fine_tuned/t5-base/QNLI/checkpoint-6548", "epoch": 5.0, "eval_steps": 500, "global_step": 8185, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.30543677458766033, "grad_norm": 3.3223073482513428, "learning_rate": 4.6945632254123404e-05, "loss": 0.4673, "step": 500 }, { "epoch": 0.6108735491753207, "grad_norm": 3.269711494445801, "learning_rate": 4.389126450824679e-05, "loss": 0.3224, "step": 1000 }, { "epoch": 0.916310323762981, "grad_norm": 1.466129183769226, "learning_rate": 4.083689676237019e-05, "loss": 0.2856, "step": 1500 }, { "epoch": 1.0, "eval_accuracy": 0.914881933003844, "eval_loss": 0.22163903713226318, "eval_runtime": 10.4314, "eval_samples_per_second": 523.707, "eval_steps_per_second": 8.244, "step": 1637 }, { "epoch": 1.2217470983506413, "grad_norm": 2.509833335876465, "learning_rate": 3.778252901649359e-05, "loss": 0.2448, "step": 2000 }, { "epoch": 1.5271838729383018, "grad_norm": 3.2846474647521973, "learning_rate": 3.472816127061698e-05, "loss": 0.2259, "step": 2500 }, { "epoch": 1.832620647525962, "grad_norm": 2.8232882022857666, "learning_rate": 3.167379352474038e-05, "loss": 0.2258, "step": 3000 }, { "epoch": 2.0, "eval_accuracy": 0.9220208676551346, "eval_loss": 0.20598259568214417, "eval_runtime": 9.5458, "eval_samples_per_second": 572.292, "eval_steps_per_second": 9.009, "step": 3274 }, { "epoch": 2.1380574221136226, "grad_norm": 2.700801372528076, "learning_rate": 2.8619425778863777e-05, "loss": 0.1986, "step": 3500 }, { "epoch": 2.4434941967012827, "grad_norm": 1.884992241859436, "learning_rate": 2.556505803298717e-05, "loss": 0.1777, "step": 4000 }, { "epoch": 2.748930971288943, "grad_norm": 1.391169548034668, "learning_rate": 2.251069028711057e-05, "loss": 0.1791, "step": 4500 }, { "epoch": 3.0, "eval_accuracy": 0.9276954054548783, "eval_loss": 0.20381644368171692, "eval_runtime": 9.2262, "eval_samples_per_second": 592.117, "eval_steps_per_second": 9.321, "step": 4911 }, { "epoch": 3.0543677458766036, "grad_norm": 2.7919557094573975, "learning_rate": 1.9456322541233964e-05, "loss": 0.1724, "step": 5000 }, { "epoch": 3.359804520464264, "grad_norm": 2.4957516193389893, "learning_rate": 1.6401954795357362e-05, "loss": 0.1461, "step": 5500 }, { "epoch": 3.665241295051924, "grad_norm": 3.270733118057251, "learning_rate": 1.3347587049480758e-05, "loss": 0.1472, "step": 6000 }, { "epoch": 3.9706780696395847, "grad_norm": 4.12385368347168, "learning_rate": 1.0293219303604154e-05, "loss": 0.1476, "step": 6500 }, { "epoch": 4.0, "eval_accuracy": 0.9282445542742083, "eval_loss": 0.2214784473180771, "eval_runtime": 9.3481, "eval_samples_per_second": 584.394, "eval_steps_per_second": 9.2, "step": 6548 }, { "epoch": 4.276114844227245, "grad_norm": 3.3767876625061035, "learning_rate": 7.238851557727551e-06, "loss": 0.1305, "step": 7000 }, { "epoch": 4.581551618814905, "grad_norm": 2.924355983734131, "learning_rate": 4.184483811850947e-06, "loss": 0.1278, "step": 7500 }, { "epoch": 4.886988393402565, "grad_norm": 5.803096294403076, "learning_rate": 1.1301160659743434e-06, "loss": 0.1263, "step": 8000 }, { "epoch": 5.0, "eval_accuracy": 0.9278784550613216, "eval_loss": 0.2300529181957245, "eval_runtime": 9.2683, "eval_samples_per_second": 589.431, "eval_steps_per_second": 9.279, "step": 8185 }, { "epoch": 5.0, "step": 8185, "total_flos": 7.99682578684032e+16, "train_loss": 0.2058652666915679, "train_runtime": 2769.836, "train_samples_per_second": 189.078, "train_steps_per_second": 2.955 } ], "logging_steps": 500, "max_steps": 8185, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.99682578684032e+16, "train_batch_size": 64, "trial_name": null, "trial_params": null }