{ "best_metric": 0.10316114127635956, "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_eg_aragpt2-base/checkpoint-35525", "epoch": 10.0, "eval_steps": 500, "global_step": 71050, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.1684599220752716, "learning_rate": 4.766772598870057e-05, "loss": 0.2542, "step": 7105 }, { "epoch": 1.0, "eval_bleu": 0.07285026466195073, "eval_loss": 0.11986471712589264, "eval_rouge1": 0.31870746501286634, "eval_rouge2": 0.11025143574709506, "eval_rougeL": 0.30940400851788263, "eval_runtime": 445.2495, "eval_samples_per_second": 31.912, "eval_steps_per_second": 3.991, "step": 7105 }, { "epoch": 2.0, "grad_norm": 0.16581259667873383, "learning_rate": 4.515889830508475e-05, "loss": 0.1078, "step": 14210 }, { "epoch": 2.0, "eval_bleu": 0.104385167515318, "eval_loss": 0.1118762344121933, "eval_rouge1": 0.38033922316590574, "eval_rouge2": 0.16357204386475152, "eval_rougeL": 0.37196606997888715, "eval_runtime": 445.4751, "eval_samples_per_second": 31.896, "eval_steps_per_second": 3.989, "step": 14210 }, { "epoch": 3.0, "grad_norm": 0.18644841015338898, "learning_rate": 4.265007062146893e-05, "loss": 0.0972, "step": 21315 }, { "epoch": 3.0, "eval_bleu": 0.12224033944483013, "eval_loss": 0.10767202824354172, "eval_rouge1": 0.4109049665753015, "eval_rouge2": 0.19329040527739555, "eval_rougeL": 0.40326193036349967, "eval_runtime": 383.8172, "eval_samples_per_second": 37.02, "eval_steps_per_second": 4.63, "step": 21315 }, { "epoch": 4.0, "grad_norm": 0.2712990939617157, "learning_rate": 4.014124293785311e-05, "loss": 0.0902, "step": 28420 }, { "epoch": 4.0, "eval_bleu": 0.13124006032752472, "eval_loss": 0.10514508932828903, "eval_rouge1": 0.42940503923153844, "eval_rouge2": 0.2090469696784658, "eval_rougeL": 0.4223466730872162, "eval_runtime": 323.7733, "eval_samples_per_second": 43.886, "eval_steps_per_second": 5.488, "step": 28420 }, { "epoch": 5.0, "grad_norm": 0.16391794383525848, "learning_rate": 3.763241525423729e-05, "loss": 0.0846, "step": 35525 }, { "epoch": 5.0, "eval_bleu": 0.14049162127130865, "eval_loss": 0.10316114127635956, "eval_rouge1": 0.44551198545007975, "eval_rouge2": 0.22506890852974587, "eval_rougeL": 0.4382572142917238, "eval_runtime": 340.0971, "eval_samples_per_second": 41.779, "eval_steps_per_second": 5.225, "step": 35525 }, { "epoch": 6.0, "grad_norm": 0.1612280309200287, "learning_rate": 3.5123587570621466e-05, "loss": 0.0799, "step": 42630 }, { "epoch": 6.0, "eval_bleu": 0.14535221713569074, "eval_loss": 0.10411085933446884, "eval_rouge1": 0.45365603658460285, "eval_rouge2": 0.23383881662198475, "eval_rougeL": 0.4465500235966283, "eval_runtime": 384.6849, "eval_samples_per_second": 36.937, "eval_steps_per_second": 4.619, "step": 42630 }, { "epoch": 7.0, "grad_norm": 0.252353310585022, "learning_rate": 3.261475988700565e-05, "loss": 0.0759, "step": 49735 }, { "epoch": 7.0, "eval_bleu": 0.1493733281238007, "eval_loss": 0.10441984981298447, "eval_rouge1": 0.4622737132167348, "eval_rouge2": 0.24252644756195563, "eval_rougeL": 0.45529247029346154, "eval_runtime": 324.6182, "eval_samples_per_second": 43.771, "eval_steps_per_second": 5.474, "step": 49735 }, { "epoch": 8.0, "grad_norm": 0.21136653423309326, "learning_rate": 3.010593220338983e-05, "loss": 0.0722, "step": 56840 }, { "epoch": 8.0, "eval_bleu": 0.1526924816405305, "eval_loss": 0.10443145781755447, "eval_rouge1": 0.4655122846049303, "eval_rouge2": 0.24695475478185897, "eval_rougeL": 0.45872867266974005, "eval_runtime": 324.8018, "eval_samples_per_second": 43.747, "eval_steps_per_second": 5.471, "step": 56840 }, { "epoch": 9.0, "grad_norm": 0.26080313324928284, "learning_rate": 2.7597104519774014e-05, "loss": 0.069, "step": 63945 }, { "epoch": 9.0, "eval_bleu": 0.153555473627703, "eval_loss": 0.10583677142858505, "eval_rouge1": 0.4688625732021522, "eval_rouge2": 0.24885496454985231, "eval_rougeL": 0.46205637495730445, "eval_runtime": 324.9582, "eval_samples_per_second": 43.726, "eval_steps_per_second": 5.468, "step": 63945 }, { "epoch": 10.0, "grad_norm": 0.27384456992149353, "learning_rate": 2.5088276836158192e-05, "loss": 0.066, "step": 71050 }, { "epoch": 10.0, "eval_bleu": 0.1549549290214687, "eval_loss": 0.10621096938848495, "eval_rouge1": 0.4724064043822283, "eval_rouge2": 0.25225313492301393, "eval_rougeL": 0.46574710538787245, "eval_runtime": 327.3388, "eval_samples_per_second": 43.408, "eval_steps_per_second": 5.429, "step": 71050 }, { "epoch": 10.0, "step": 71050, "total_flos": 2.9701587861504e+17, "train_loss": 0.09969830163342276, "train_runtime": 40778.4516, "train_samples_per_second": 27.876, "train_steps_per_second": 3.485 } ], "logging_steps": 500, "max_steps": 142100, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.9701587861504e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }