{ "best_metric": 0.9734994769096375, "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_irq_03/checkpoint-1057", "epoch": 6.0, "eval_steps": 500, "global_step": 6342, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 2.316096782684326, "learning_rate": 4.865067829457365e-05, "loss": 1.1923, "step": 1057 }, { "epoch": 1.0, "eval_bleu": 0.3758671129852906, "eval_loss": 0.9734994769096375, "eval_rouge1": 0.5620694784691431, "eval_rouge2": 0.3072634138927888, "eval_rougeL": 0.5602967261743523, "eval_runtime": 18.3819, "eval_samples_per_second": 114.95, "eval_steps_per_second": 14.416, "step": 1057 }, { "epoch": 2.0, "grad_norm": 1.9589498043060303, "learning_rate": 4.609011627906977e-05, "loss": 0.3956, "step": 2114 }, { "epoch": 2.0, "eval_bleu": 0.41456228931689637, "eval_loss": 1.2529053688049316, "eval_rouge1": 0.6342121068042533, "eval_rouge2": 0.3777098530906886, "eval_rougeL": 0.6329704343270921, "eval_runtime": 27.6701, "eval_samples_per_second": 76.364, "eval_steps_per_second": 9.577, "step": 2114 }, { "epoch": 3.0, "grad_norm": 1.9037576913833618, "learning_rate": 4.3529554263565894e-05, "loss": 0.2578, "step": 3171 }, { "epoch": 3.0, "eval_bleu": 0.4177567847266788, "eval_loss": 1.1432485580444336, "eval_rouge1": 0.6356354703691816, "eval_rouge2": 0.38873743710479597, "eval_rougeL": 0.6340522473648312, "eval_runtime": 16.0819, "eval_samples_per_second": 131.39, "eval_steps_per_second": 16.478, "step": 3171 }, { "epoch": 4.0, "grad_norm": 3.466954469680786, "learning_rate": 4.096899224806201e-05, "loss": 0.1771, "step": 4228 }, { "epoch": 4.0, "eval_bleu": 0.420916242293927, "eval_loss": 1.3314274549484253, "eval_rouge1": 0.6564326494399506, "eval_rouge2": 0.4082972964966378, "eval_rougeL": 0.6545946336979762, "eval_runtime": 15.9444, "eval_samples_per_second": 132.523, "eval_steps_per_second": 16.62, "step": 4228 }, { "epoch": 5.0, "grad_norm": 2.057302236557007, "learning_rate": 3.840843023255814e-05, "loss": 0.1337, "step": 5285 }, { "epoch": 5.0, "eval_bleu": 0.42563539215296015, "eval_loss": 1.344425916671753, "eval_rouge1": 0.6508055039664861, "eval_rouge2": 0.40613669991922435, "eval_rougeL": 0.6494650833785085, "eval_runtime": 139.4783, "eval_samples_per_second": 15.149, "eval_steps_per_second": 1.9, "step": 5285 }, { "epoch": 6.0, "grad_norm": 1.6741355657577515, "learning_rate": 3.5847868217054265e-05, "loss": 0.1118, "step": 6342 }, { "epoch": 6.0, "eval_bleu": 0.42475546499314026, "eval_loss": 1.369273066520691, "eval_rouge1": 0.6534368445259939, "eval_rouge2": 0.40793487128864964, "eval_rougeL": 0.6523435258107617, "eval_runtime": 22.7821, "eval_samples_per_second": 92.748, "eval_steps_per_second": 11.632, "step": 6342 }, { "epoch": 6.0, "step": 6342, "total_flos": 1.1774067405225984e+16, "train_loss": 0.37803844186042396, "train_runtime": 4074.3615, "train_samples_per_second": 41.489, "train_steps_per_second": 5.189 } ], "logging_steps": 500, "max_steps": 21140, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1774067405225984e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }