|
{ |
|
"best_metric": 1.0865594148635864, |
|
"best_model_checkpoint": "./outputs/checkpoint-4100", |
|
"epoch": 2.9879781420765026, |
|
"eval_steps": 100, |
|
"global_step": 4100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 1.765, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.631608486175537, |
|
"eval_runtime": 430.1354, |
|
"eval_samples_per_second": 14.586, |
|
"eval_steps_per_second": 1.825, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6077, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.5870035886764526, |
|
"eval_runtime": 417.8578, |
|
"eval_samples_per_second": 15.015, |
|
"eval_steps_per_second": 1.879, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5755, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.5563029050827026, |
|
"eval_runtime": 417.8787, |
|
"eval_samples_per_second": 15.014, |
|
"eval_steps_per_second": 1.879, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5445, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.5300686359405518, |
|
"eval_runtime": 417.9846, |
|
"eval_samples_per_second": 15.01, |
|
"eval_steps_per_second": 1.878, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5114, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.5074832439422607, |
|
"eval_runtime": 417.962, |
|
"eval_samples_per_second": 15.011, |
|
"eval_steps_per_second": 1.878, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4946, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.4870833158493042, |
|
"eval_runtime": 417.6568, |
|
"eval_samples_per_second": 15.022, |
|
"eval_steps_per_second": 1.88, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 1.472, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.4683642387390137, |
|
"eval_runtime": 417.7114, |
|
"eval_samples_per_second": 15.02, |
|
"eval_steps_per_second": 1.879, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4622, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.4501450061798096, |
|
"eval_runtime": 417.7046, |
|
"eval_samples_per_second": 15.02, |
|
"eval_steps_per_second": 1.879, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 1.433, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.4323909282684326, |
|
"eval_runtime": 417.8696, |
|
"eval_samples_per_second": 15.014, |
|
"eval_steps_per_second": 1.879, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4268, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.4162870645523071, |
|
"eval_runtime": 417.7667, |
|
"eval_samples_per_second": 15.018, |
|
"eval_steps_per_second": 1.879, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4125, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.4018020629882812, |
|
"eval_runtime": 417.5926, |
|
"eval_samples_per_second": 15.024, |
|
"eval_steps_per_second": 1.88, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3846, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.3857518434524536, |
|
"eval_runtime": 417.6267, |
|
"eval_samples_per_second": 15.023, |
|
"eval_steps_per_second": 1.88, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3933, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.3718016147613525, |
|
"eval_runtime": 417.2876, |
|
"eval_samples_per_second": 15.035, |
|
"eval_steps_per_second": 1.881, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3556, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 1.3577570915222168, |
|
"eval_runtime": 417.5078, |
|
"eval_samples_per_second": 15.027, |
|
"eval_steps_per_second": 1.88, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3316, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 1.3446884155273438, |
|
"eval_runtime": 417.3899, |
|
"eval_samples_per_second": 15.032, |
|
"eval_steps_per_second": 1.881, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3207, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 1.3314917087554932, |
|
"eval_runtime": 417.7717, |
|
"eval_samples_per_second": 15.018, |
|
"eval_steps_per_second": 1.879, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3212, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 1.318053960800171, |
|
"eval_runtime": 417.56, |
|
"eval_samples_per_second": 15.025, |
|
"eval_steps_per_second": 1.88, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2967, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 1.3063520193099976, |
|
"eval_runtime": 417.5913, |
|
"eval_samples_per_second": 15.024, |
|
"eval_steps_per_second": 1.88, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2883, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 1.2934125661849976, |
|
"eval_runtime": 417.7511, |
|
"eval_samples_per_second": 15.019, |
|
"eval_steps_per_second": 1.879, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0002, |
|
"loss": 1.276, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 1.2826957702636719, |
|
"eval_runtime": 448.0598, |
|
"eval_samples_per_second": 14.003, |
|
"eval_steps_per_second": 1.752, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2659, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 1.2710933685302734, |
|
"eval_runtime": 417.5822, |
|
"eval_samples_per_second": 15.025, |
|
"eval_steps_per_second": 1.88, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2451, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 1.259511113166809, |
|
"eval_runtime": 417.6409, |
|
"eval_samples_per_second": 15.022, |
|
"eval_steps_per_second": 1.88, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2436, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 1.2497906684875488, |
|
"eval_runtime": 417.7302, |
|
"eval_samples_per_second": 15.019, |
|
"eval_steps_per_second": 1.879, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2412, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 1.2375760078430176, |
|
"eval_runtime": 417.7184, |
|
"eval_samples_per_second": 15.02, |
|
"eval_steps_per_second": 1.879, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2223, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 1.2266868352890015, |
|
"eval_runtime": 417.9231, |
|
"eval_samples_per_second": 15.012, |
|
"eval_steps_per_second": 1.878, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2106, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 1.216168999671936, |
|
"eval_runtime": 417.7189, |
|
"eval_samples_per_second": 15.02, |
|
"eval_steps_per_second": 1.879, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2034, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 1.2053289413452148, |
|
"eval_runtime": 417.5961, |
|
"eval_samples_per_second": 15.024, |
|
"eval_steps_per_second": 1.88, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1787, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 1.1963659524917603, |
|
"eval_runtime": 417.7494, |
|
"eval_samples_per_second": 15.019, |
|
"eval_steps_per_second": 1.879, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1541, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 1.1863473653793335, |
|
"eval_runtime": 417.6096, |
|
"eval_samples_per_second": 15.024, |
|
"eval_steps_per_second": 1.88, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1567, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 1.17583167552948, |
|
"eval_runtime": 417.6171, |
|
"eval_samples_per_second": 15.023, |
|
"eval_steps_per_second": 1.88, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1395, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 1.1677733659744263, |
|
"eval_runtime": 417.4863, |
|
"eval_samples_per_second": 15.028, |
|
"eval_steps_per_second": 1.88, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1528, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 1.1819865703582764, |
|
"eval_runtime": 339.2738, |
|
"eval_samples_per_second": 18.492, |
|
"eval_steps_per_second": 2.314, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.0002, |
|
"loss": 1.142, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_loss": 1.168481469154358, |
|
"eval_runtime": 345.6314, |
|
"eval_samples_per_second": 18.152, |
|
"eval_steps_per_second": 2.271, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1252, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 1.1566288471221924, |
|
"eval_runtime": 339.7257, |
|
"eval_samples_per_second": 18.468, |
|
"eval_steps_per_second": 2.311, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1227, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 1.1449061632156372, |
|
"eval_runtime": 339.6535, |
|
"eval_samples_per_second": 18.472, |
|
"eval_steps_per_second": 2.311, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0002, |
|
"loss": 1.116, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 1.1353813409805298, |
|
"eval_runtime": 340.0732, |
|
"eval_samples_per_second": 18.449, |
|
"eval_steps_per_second": 2.308, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1106, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 1.1255244016647339, |
|
"eval_runtime": 341.3582, |
|
"eval_samples_per_second": 18.38, |
|
"eval_steps_per_second": 2.3, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0945, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 1.116302251815796, |
|
"eval_runtime": 339.9589, |
|
"eval_samples_per_second": 18.455, |
|
"eval_steps_per_second": 2.309, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0902, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 1.1058732271194458, |
|
"eval_runtime": 339.729, |
|
"eval_samples_per_second": 18.468, |
|
"eval_steps_per_second": 2.311, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0798, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_loss": 1.0949488878250122, |
|
"eval_runtime": 339.8787, |
|
"eval_samples_per_second": 18.46, |
|
"eval_steps_per_second": 2.31, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0685, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 1.0865594148635864, |
|
"eval_runtime": 340.2766, |
|
"eval_samples_per_second": 18.438, |
|
"eval_steps_per_second": 2.307, |
|
"step": 4100 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 1.3292894134103962e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|