|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 24.0, |
|
"eval_steps": 500, |
|
"global_step": 28368, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.894247038917091e-06, |
|
"loss": 3.5932, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.78849407783418e-06, |
|
"loss": 3.336, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 3.2544538974761963, |
|
"eval_runtime": 6.101, |
|
"eval_samples_per_second": 41.961, |
|
"eval_steps_per_second": 5.245, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 9.68274111675127e-06, |
|
"loss": 3.2562, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 9.57698815566836e-06, |
|
"loss": 3.1967, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 3.1576380729675293, |
|
"eval_runtime": 6.0951, |
|
"eval_samples_per_second": 42.001, |
|
"eval_steps_per_second": 5.25, |
|
"step": 2364 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 9.47123519458545e-06, |
|
"loss": 3.1613, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 9.36548223350254e-06, |
|
"loss": 3.1268, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 9.25972927241963e-06, |
|
"loss": 3.1148, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 3.1073131561279297, |
|
"eval_runtime": 6.0936, |
|
"eval_samples_per_second": 42.011, |
|
"eval_steps_per_second": 5.251, |
|
"step": 3546 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 9.15397631133672e-06, |
|
"loss": 3.0804, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 9.048223350253808e-06, |
|
"loss": 3.0814, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 3.074193000793457, |
|
"eval_runtime": 6.0939, |
|
"eval_samples_per_second": 42.009, |
|
"eval_steps_per_second": 5.251, |
|
"step": 4728 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 8.942470389170898e-06, |
|
"loss": 3.054, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 8.836717428087988e-06, |
|
"loss": 3.0355, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 3.04995059967041, |
|
"eval_runtime": 6.09, |
|
"eval_samples_per_second": 42.036, |
|
"eval_steps_per_second": 5.254, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 8.730964467005076e-06, |
|
"loss": 3.0365, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 8.625211505922166e-06, |
|
"loss": 3.0104, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 8.519458544839256e-06, |
|
"loss": 3.0126, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 3.0317230224609375, |
|
"eval_runtime": 6.0899, |
|
"eval_samples_per_second": 42.037, |
|
"eval_steps_per_second": 5.255, |
|
"step": 7092 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 8.413705583756346e-06, |
|
"loss": 2.9923, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 8.307952622673435e-06, |
|
"loss": 2.9902, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 3.0167293548583984, |
|
"eval_runtime": 6.0906, |
|
"eval_samples_per_second": 42.032, |
|
"eval_steps_per_second": 5.254, |
|
"step": 8274 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 8.202199661590525e-06, |
|
"loss": 2.9783, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 8.096446700507615e-06, |
|
"loss": 2.9722, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 3.004361391067505, |
|
"eval_runtime": 6.0481, |
|
"eval_samples_per_second": 42.327, |
|
"eval_steps_per_second": 5.291, |
|
"step": 9456 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 7.990693739424705e-06, |
|
"loss": 2.9628, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 7.884940778341795e-06, |
|
"loss": 2.9593, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 7.779187817258885e-06, |
|
"loss": 2.9485, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 2.9940547943115234, |
|
"eval_runtime": 6.1451, |
|
"eval_samples_per_second": 41.659, |
|
"eval_steps_per_second": 5.207, |
|
"step": 10638 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 7.673434856175973e-06, |
|
"loss": 2.9405, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 7.567681895093063e-06, |
|
"loss": 2.943, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 2.9857802391052246, |
|
"eval_runtime": 6.2619, |
|
"eval_samples_per_second": 40.882, |
|
"eval_steps_per_second": 5.11, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 10.15, |
|
"learning_rate": 7.461928934010153e-06, |
|
"loss": 2.9243, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 7.356175972927243e-06, |
|
"loss": 2.9228, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 7.2504230118443316e-06, |
|
"loss": 2.9216, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 2.9776651859283447, |
|
"eval_runtime": 6.2854, |
|
"eval_samples_per_second": 40.73, |
|
"eval_steps_per_second": 5.091, |
|
"step": 13002 |
|
}, |
|
{ |
|
"epoch": 11.42, |
|
"learning_rate": 7.144670050761422e-06, |
|
"loss": 2.9118, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"learning_rate": 7.038917089678512e-06, |
|
"loss": 2.911, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 2.9713006019592285, |
|
"eval_runtime": 6.1107, |
|
"eval_samples_per_second": 41.894, |
|
"eval_steps_per_second": 5.237, |
|
"step": 14184 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 6.933164128595601e-06, |
|
"loss": 2.9038, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 12.69, |
|
"learning_rate": 6.827411167512691e-06, |
|
"loss": 2.8924, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 2.9653375148773193, |
|
"eval_runtime": 6.1162, |
|
"eval_samples_per_second": 41.856, |
|
"eval_steps_per_second": 5.232, |
|
"step": 15366 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 6.721658206429781e-06, |
|
"loss": 2.9025, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"learning_rate": 6.61590524534687e-06, |
|
"loss": 2.8886, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"learning_rate": 6.51015228426396e-06, |
|
"loss": 2.8882, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 2.960761547088623, |
|
"eval_runtime": 6.1211, |
|
"eval_samples_per_second": 41.822, |
|
"eval_steps_per_second": 5.228, |
|
"step": 16548 |
|
}, |
|
{ |
|
"epoch": 14.38, |
|
"learning_rate": 6.40439932318105e-06, |
|
"loss": 2.8777, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 6.298646362098139e-06, |
|
"loss": 2.8826, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 2.9559221267700195, |
|
"eval_runtime": 6.0998, |
|
"eval_samples_per_second": 41.969, |
|
"eval_steps_per_second": 5.246, |
|
"step": 17730 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"learning_rate": 6.1928934010152285e-06, |
|
"loss": 2.8796, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"learning_rate": 6.0871404399323185e-06, |
|
"loss": 2.8697, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 2.952040672302246, |
|
"eval_runtime": 6.2485, |
|
"eval_samples_per_second": 40.97, |
|
"eval_steps_per_second": 5.121, |
|
"step": 18912 |
|
}, |
|
{ |
|
"epoch": 16.07, |
|
"learning_rate": 5.981387478849409e-06, |
|
"loss": 2.8645, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"learning_rate": 5.875634517766498e-06, |
|
"loss": 2.8678, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 16.92, |
|
"learning_rate": 5.769881556683588e-06, |
|
"loss": 2.8616, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 2.948793888092041, |
|
"eval_runtime": 6.2711, |
|
"eval_samples_per_second": 40.822, |
|
"eval_steps_per_second": 5.103, |
|
"step": 20094 |
|
}, |
|
{ |
|
"epoch": 17.34, |
|
"learning_rate": 5.664128595600678e-06, |
|
"loss": 2.8548, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 17.77, |
|
"learning_rate": 5.558375634517766e-06, |
|
"loss": 2.8529, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 2.945361614227295, |
|
"eval_runtime": 6.3517, |
|
"eval_samples_per_second": 40.304, |
|
"eval_steps_per_second": 5.038, |
|
"step": 21276 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"learning_rate": 5.452622673434856e-06, |
|
"loss": 2.8557, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 18.61, |
|
"learning_rate": 5.346869712351946e-06, |
|
"loss": 2.8448, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 2.9428470134735107, |
|
"eval_runtime": 6.2219, |
|
"eval_samples_per_second": 41.145, |
|
"eval_steps_per_second": 5.143, |
|
"step": 22458 |
|
}, |
|
{ |
|
"epoch": 19.04, |
|
"learning_rate": 5.241116751269036e-06, |
|
"loss": 2.8458, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 19.46, |
|
"learning_rate": 5.1353637901861255e-06, |
|
"loss": 2.8462, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 19.88, |
|
"learning_rate": 5.0296108291032155e-06, |
|
"loss": 2.84, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 2.940398693084717, |
|
"eval_runtime": 6.2496, |
|
"eval_samples_per_second": 40.962, |
|
"eval_steps_per_second": 5.12, |
|
"step": 23640 |
|
}, |
|
{ |
|
"epoch": 20.3, |
|
"learning_rate": 4.923857868020305e-06, |
|
"loss": 2.8349, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 20.73, |
|
"learning_rate": 4.818104906937395e-06, |
|
"loss": 2.8285, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 2.938441276550293, |
|
"eval_runtime": 6.1601, |
|
"eval_samples_per_second": 41.558, |
|
"eval_steps_per_second": 5.195, |
|
"step": 24822 |
|
}, |
|
{ |
|
"epoch": 21.15, |
|
"learning_rate": 4.712351945854484e-06, |
|
"loss": 2.8345, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 21.57, |
|
"learning_rate": 4.606598984771574e-06, |
|
"loss": 2.8302, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 4.500846023688664e-06, |
|
"loss": 2.8266, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 2.9362807273864746, |
|
"eval_runtime": 6.0955, |
|
"eval_samples_per_second": 41.998, |
|
"eval_steps_per_second": 5.25, |
|
"step": 26004 |
|
}, |
|
{ |
|
"epoch": 22.42, |
|
"learning_rate": 4.395093062605753e-06, |
|
"loss": 2.819, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 22.84, |
|
"learning_rate": 4.289340101522843e-06, |
|
"loss": 2.8232, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 2.934544324874878, |
|
"eval_runtime": 6.1597, |
|
"eval_samples_per_second": 41.561, |
|
"eval_steps_per_second": 5.195, |
|
"step": 27186 |
|
}, |
|
{ |
|
"epoch": 23.27, |
|
"learning_rate": 4.183587140439932e-06, |
|
"loss": 2.8213, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 23.69, |
|
"learning_rate": 4.0778341793570224e-06, |
|
"loss": 2.8136, |
|
"step": 28000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 47280, |
|
"num_train_epochs": 40, |
|
"save_steps": 9456, |
|
"total_flos": 4.4459884412928e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|