|
{ |
|
"best_metric": 0.8369131635471003, |
|
"best_model_checkpoint": "/hdd1/mujeen/retrieval_prf/output/labeler_nq.train.v7.0_nq.dev.v7.0_rlnq_title/checkpoint-76000", |
|
"epoch": 0.9999910574558462, |
|
"global_step": 83868, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.904755091334001e-05, |
|
"loss": 0.5261, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.7910401963792575, |
|
"eval_loss": 0.4569447934627533, |
|
"eval_runtime": 61.7181, |
|
"eval_samples_per_second": 211.219, |
|
"eval_steps_per_second": 26.41, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8093671006820245e-05, |
|
"loss": 0.4674, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.7984811291807303, |
|
"eval_loss": 0.42789292335510254, |
|
"eval_runtime": 62.0897, |
|
"eval_samples_per_second": 209.954, |
|
"eval_steps_per_second": 26.252, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7140744980206995e-05, |
|
"loss": 0.4453, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.804311138386008, |
|
"eval_loss": 0.4111127257347107, |
|
"eval_runtime": 73.971, |
|
"eval_samples_per_second": 176.231, |
|
"eval_steps_per_second": 22.036, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6186865073687225e-05, |
|
"loss": 0.4247, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.8105247008284749, |
|
"eval_loss": 0.4133109748363495, |
|
"eval_runtime": 67.9092, |
|
"eval_samples_per_second": 191.962, |
|
"eval_steps_per_second": 24.003, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5233462107120716e-05, |
|
"loss": 0.4097, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.8149739183798711, |
|
"eval_loss": 0.411438524723053, |
|
"eval_runtime": 76.7711, |
|
"eval_samples_per_second": 169.803, |
|
"eval_steps_per_second": 21.232, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4280059140554206e-05, |
|
"loss": 0.3915, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.8158944461491255, |
|
"eval_loss": 0.4367925524711609, |
|
"eval_runtime": 73.8165, |
|
"eval_samples_per_second": 176.6, |
|
"eval_steps_per_second": 22.082, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.3326179234034435e-05, |
|
"loss": 0.3783, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.8168916845658177, |
|
"eval_loss": 0.43372228741645813, |
|
"eval_runtime": 74.7748, |
|
"eval_samples_per_second": 174.337, |
|
"eval_steps_per_second": 21.799, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.2373014737444558e-05, |
|
"loss": 0.365, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.8242559067198527, |
|
"eval_loss": 0.40461644530296326, |
|
"eval_runtime": 77.9179, |
|
"eval_samples_per_second": 167.304, |
|
"eval_steps_per_second": 20.919, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1419373300901418e-05, |
|
"loss": 0.3477, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.828628413623811, |
|
"eval_loss": 0.4262824058532715, |
|
"eval_runtime": 74.7614, |
|
"eval_samples_per_second": 174.368, |
|
"eval_steps_per_second": 21.803, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0465731864358278e-05, |
|
"loss": 0.3341, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.8258668303160479, |
|
"eval_loss": 0.40725135803222656, |
|
"eval_runtime": 77.1145, |
|
"eval_samples_per_second": 169.047, |
|
"eval_steps_per_second": 21.137, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.51209042781514e-06, |
|
"loss": 0.3214, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.8284749923289353, |
|
"eval_loss": 0.4358045756816864, |
|
"eval_runtime": 79.2128, |
|
"eval_samples_per_second": 164.569, |
|
"eval_steps_per_second": 20.577, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.55868746124863e-06, |
|
"loss": 0.3112, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.8285517029763732, |
|
"eval_loss": 0.4215088188648224, |
|
"eval_runtime": 76.3765, |
|
"eval_samples_per_second": 170.681, |
|
"eval_steps_per_second": 21.342, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.6050460247054905e-06, |
|
"loss": 0.2996, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.8277845965019944, |
|
"eval_loss": 0.4198075234889984, |
|
"eval_runtime": 75.7616, |
|
"eval_samples_per_second": 172.066, |
|
"eval_steps_per_second": 21.515, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.651643058138981e-06, |
|
"loss": 0.2863, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.8310831543418227, |
|
"eval_loss": 0.4417212903499603, |
|
"eval_runtime": 80.0618, |
|
"eval_samples_per_second": 162.824, |
|
"eval_steps_per_second": 20.359, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.698240091572471e-06, |
|
"loss": 0.2739, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.8222614298864682, |
|
"eval_loss": 0.5145458579063416, |
|
"eval_runtime": 78.9172, |
|
"eval_samples_per_second": 165.186, |
|
"eval_steps_per_second": 20.655, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.744598655029332e-06, |
|
"loss": 0.2656, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.8265572261429887, |
|
"eval_loss": 0.5052666068077087, |
|
"eval_runtime": 77.2292, |
|
"eval_samples_per_second": 168.796, |
|
"eval_steps_per_second": 21.106, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.7911956884628224e-06, |
|
"loss": 0.2558, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.8353022399509051, |
|
"eval_loss": 0.47119611501693726, |
|
"eval_runtime": 79.1339, |
|
"eval_samples_per_second": 164.733, |
|
"eval_steps_per_second": 20.598, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.8377927218963137e-06, |
|
"loss": 0.2467, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.8312365756366984, |
|
"eval_loss": 0.5034319162368774, |
|
"eval_runtime": 82.6755, |
|
"eval_samples_per_second": 157.677, |
|
"eval_steps_per_second": 19.716, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.8839128153765443e-06, |
|
"loss": 0.2342, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_accuracy": 0.8369131635471003, |
|
"eval_loss": 0.4584212303161621, |
|
"eval_runtime": 81.5904, |
|
"eval_samples_per_second": 159.774, |
|
"eval_steps_per_second": 19.978, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.305098488100349e-07, |
|
"loss": 0.23, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.8341515802393372, |
|
"eval_loss": 0.49471235275268555, |
|
"eval_runtime": 87.9548, |
|
"eval_samples_per_second": 148.213, |
|
"eval_steps_per_second": 18.532, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 83868, |
|
"total_flos": 7.653345366712497e+17, |
|
"train_loss": 0.3355050985067308, |
|
"train_runtime": 56077.3175, |
|
"train_samples_per_second": 47.859, |
|
"train_steps_per_second": 1.496 |
|
} |
|
], |
|
"max_steps": 83868, |
|
"num_train_epochs": 1, |
|
"total_flos": 7.653345366712497e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|