File size: 5,540 Bytes
bc0f964 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
{
"best_metric": 0.0678805559873581,
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_arabiangpt0.3/checkpoint-101256",
"epoch": 8.0,
"eval_steps": 500,
"global_step": 270016,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 0.5094387531280518,
"learning_rate": 4.753520917958906e-05,
"loss": 0.0845,
"step": 33752
},
{
"epoch": 1.0,
"eval_bleu": 0.1360083183193832,
"eval_loss": 0.0708920955657959,
"eval_rouge1": 0.48412628006639347,
"eval_rouge2": 0.2595444928790475,
"eval_rougeL": 0.4811152550714598,
"eval_runtime": 2076.8563,
"eval_samples_per_second": 16.249,
"eval_steps_per_second": 4.062,
"step": 33752
},
{
"epoch": 2.0,
"grad_norm": 1.2100098133087158,
"learning_rate": 4.503335606487384e-05,
"loss": 0.0638,
"step": 67504
},
{
"epoch": 2.0,
"eval_bleu": 0.15568253204889895,
"eval_loss": 0.06831026077270508,
"eval_rouge1": 0.516649446787459,
"eval_rouge2": 0.2948018415714037,
"eval_rougeL": 0.514024802556861,
"eval_runtime": 2012.7332,
"eval_samples_per_second": 16.766,
"eval_steps_per_second": 4.192,
"step": 67504
},
{
"epoch": 3.0,
"grad_norm": 0.4080255627632141,
"learning_rate": 4.253150295015863e-05,
"loss": 0.0546,
"step": 101256
},
{
"epoch": 3.0,
"eval_bleu": 0.16813566870280167,
"eval_loss": 0.0678805559873581,
"eval_rouge1": 0.5342839149814531,
"eval_rouge2": 0.3157655613133348,
"eval_rougeL": 0.5319180103197795,
"eval_runtime": 1952.412,
"eval_samples_per_second": 17.284,
"eval_steps_per_second": 4.321,
"step": 101256
},
{
"epoch": 4.0,
"grad_norm": 0.40639498829841614,
"learning_rate": 4.002964983544342e-05,
"loss": 0.0468,
"step": 135008
},
{
"epoch": 4.0,
"eval_bleu": 0.18301542857870237,
"eval_loss": 0.06886506825685501,
"eval_rouge1": 0.5476940640774866,
"eval_rouge2": 0.33203989935219014,
"eval_rougeL": 0.5452209555156204,
"eval_runtime": 1954.1613,
"eval_samples_per_second": 17.269,
"eval_steps_per_second": 4.317,
"step": 135008
},
{
"epoch": 5.0,
"grad_norm": 0.5491040945053101,
"learning_rate": 3.7527796720728204e-05,
"loss": 0.0401,
"step": 168760
},
{
"epoch": 5.0,
"eval_bleu": 0.19405846737344298,
"eval_loss": 0.0706464946269989,
"eval_rouge1": 0.5548980705277954,
"eval_rouge2": 0.34218497796474945,
"eval_rougeL": 0.55265645352457,
"eval_runtime": 2015.2556,
"eval_samples_per_second": 16.745,
"eval_steps_per_second": 4.187,
"step": 168760
},
{
"epoch": 6.0,
"grad_norm": 0.2615252435207367,
"learning_rate": 3.502594360601299e-05,
"loss": 0.0345,
"step": 202512
},
{
"epoch": 6.0,
"eval_bleu": 0.20164731422462528,
"eval_loss": 0.07342757284641266,
"eval_rouge1": 0.5579768475101489,
"eval_rouge2": 0.34858517959726787,
"eval_rougeL": 0.5558276150233377,
"eval_runtime": 2028.3934,
"eval_samples_per_second": 16.637,
"eval_steps_per_second": 4.159,
"step": 202512
},
{
"epoch": 7.0,
"grad_norm": 0.24876318871974945,
"learning_rate": 3.252409049129777e-05,
"loss": 0.0302,
"step": 236264
},
{
"epoch": 7.0,
"eval_bleu": 0.2083227447079373,
"eval_loss": 0.0768970176577568,
"eval_rouge1": 0.5600411658774835,
"eval_rouge2": 0.3535281728359022,
"eval_rougeL": 0.5576670140997706,
"eval_runtime": 2016.181,
"eval_samples_per_second": 16.738,
"eval_steps_per_second": 4.185,
"step": 236264
},
{
"epoch": 8.0,
"grad_norm": 0.5295557379722595,
"learning_rate": 3.0022237376582564e-05,
"loss": 0.0269,
"step": 270016
},
{
"epoch": 8.0,
"eval_bleu": 0.21123631944078344,
"eval_loss": 0.07967726141214371,
"eval_rouge1": 0.5591904067243532,
"eval_rouge2": 0.3545914045960048,
"eval_rougeL": 0.5569180306629817,
"eval_runtime": 2017.1957,
"eval_samples_per_second": 16.729,
"eval_steps_per_second": 4.183,
"step": 270016
},
{
"epoch": 8.0,
"step": 270016,
"total_flos": 2.0060826554471547e+18,
"train_loss": 0.04765514560038514,
"train_runtime": 203650.1496,
"train_samples_per_second": 13.259,
"train_steps_per_second": 3.315
}
],
"logging_steps": 500,
"max_steps": 675040,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.0060826554471547e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|