|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.6107081324472627, |
|
"global_step": 445000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.996983692635866e-05, |
|
"loss": 7.2434, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9939673852717336e-05, |
|
"loss": 6.3951, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 6.350970268249512, |
|
"eval_runtime": 3.8385, |
|
"eval_samples_per_second": 260.78, |
|
"eval_steps_per_second": 16.413, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9909510779075997e-05, |
|
"loss": 6.2791, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9879347705434663e-05, |
|
"loss": 6.2011, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 6.223337173461914, |
|
"eval_runtime": 3.7358, |
|
"eval_samples_per_second": 267.946, |
|
"eval_steps_per_second": 16.864, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.984918463179333e-05, |
|
"loss": 6.15, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9819021558152e-05, |
|
"loss": 6.121, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 6.134737014770508, |
|
"eval_runtime": 3.7442, |
|
"eval_samples_per_second": 267.346, |
|
"eval_steps_per_second": 16.826, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.978885848451066e-05, |
|
"loss": 6.0839, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9758695410869325e-05, |
|
"loss": 6.0626, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 6.063777923583984, |
|
"eval_runtime": 4.0904, |
|
"eval_samples_per_second": 244.72, |
|
"eval_steps_per_second": 15.402, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.972853233722799e-05, |
|
"loss": 6.0263, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.969836926358666e-05, |
|
"loss": 6.0177, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 6.02394962310791, |
|
"eval_runtime": 3.9573, |
|
"eval_samples_per_second": 252.948, |
|
"eval_steps_per_second": 15.92, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.966820618994532e-05, |
|
"loss": 5.9988, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.963804311630399e-05, |
|
"loss": 5.9921, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 6.0168046951293945, |
|
"eval_runtime": 4.0579, |
|
"eval_samples_per_second": 246.677, |
|
"eval_steps_per_second": 15.525, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9607940368809935e-05, |
|
"loss": 5.9751, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9577837621315885e-05, |
|
"loss": 5.9632, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 5.997592926025391, |
|
"eval_runtime": 4.2019, |
|
"eval_samples_per_second": 238.224, |
|
"eval_steps_per_second": 14.993, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.954767454767455e-05, |
|
"loss": 5.9386, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.951751147403322e-05, |
|
"loss": 5.9524, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 5.969239711761475, |
|
"eval_runtime": 4.0231, |
|
"eval_samples_per_second": 248.812, |
|
"eval_steps_per_second": 15.659, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.948734840039188e-05, |
|
"loss": 5.9376, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9457185326750546e-05, |
|
"loss": 5.9328, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 5.940969944000244, |
|
"eval_runtime": 3.8844, |
|
"eval_samples_per_second": 257.697, |
|
"eval_steps_per_second": 16.219, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.942702225310921e-05, |
|
"loss": 5.9177, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.939685917946787e-05, |
|
"loss": 5.9162, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 5.932608604431152, |
|
"eval_runtime": 4.0642, |
|
"eval_samples_per_second": 246.298, |
|
"eval_steps_per_second": 15.501, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.936669610582654e-05, |
|
"loss": 5.9054, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.933659335833249e-05, |
|
"loss": 5.8906, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 5.921965599060059, |
|
"eval_runtime": 4.2691, |
|
"eval_samples_per_second": 234.477, |
|
"eval_steps_per_second": 14.757, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9306430284691156e-05, |
|
"loss": 5.8952, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9276267211049823e-05, |
|
"loss": 5.8868, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 5.910747528076172, |
|
"eval_runtime": 4.3284, |
|
"eval_samples_per_second": 231.264, |
|
"eval_steps_per_second": 14.555, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.924610413740849e-05, |
|
"loss": 5.8829, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.921600138991443e-05, |
|
"loss": 5.8738, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 5.895788669586182, |
|
"eval_runtime": 4.2991, |
|
"eval_samples_per_second": 232.842, |
|
"eval_steps_per_second": 14.654, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.91858383162731e-05, |
|
"loss": 5.8717, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.915567524263177e-05, |
|
"loss": 5.8642, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 5.891711235046387, |
|
"eval_runtime": 4.3228, |
|
"eval_samples_per_second": 231.564, |
|
"eval_steps_per_second": 14.574, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9125512168990434e-05, |
|
"loss": 5.8667, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.909540942149638e-05, |
|
"loss": 5.8494, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 5.886107921600342, |
|
"eval_runtime": 4.4064, |
|
"eval_samples_per_second": 227.167, |
|
"eval_steps_per_second": 14.297, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.906524634785505e-05, |
|
"loss": 5.8369, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.903508327421371e-05, |
|
"loss": 5.8414, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 5.89373254776001, |
|
"eval_runtime": 4.3188, |
|
"eval_samples_per_second": 231.776, |
|
"eval_steps_per_second": 14.587, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.900492020057238e-05, |
|
"loss": 5.851, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.8974757126931045e-05, |
|
"loss": 5.8512, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 5.9001264572143555, |
|
"eval_runtime": 4.3173, |
|
"eval_samples_per_second": 231.857, |
|
"eval_steps_per_second": 14.592, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.8944594053289705e-05, |
|
"loss": 5.8471, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8914491305795654e-05, |
|
"loss": 5.8319, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 5.870309829711914, |
|
"eval_runtime": 4.2282, |
|
"eval_samples_per_second": 236.743, |
|
"eval_steps_per_second": 14.9, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.888432823215432e-05, |
|
"loss": 5.8285, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.885416515851299e-05, |
|
"loss": 5.8282, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 5.834588050842285, |
|
"eval_runtime": 4.2534, |
|
"eval_samples_per_second": 235.344, |
|
"eval_steps_per_second": 14.812, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.882400208487165e-05, |
|
"loss": 5.8305, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.879383901123032e-05, |
|
"loss": 5.8159, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 5.825201034545898, |
|
"eval_runtime": 4.2051, |
|
"eval_samples_per_second": 238.046, |
|
"eval_steps_per_second": 14.982, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.876367593758898e-05, |
|
"loss": 5.8109, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.873357319009493e-05, |
|
"loss": 5.8206, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 5.839576244354248, |
|
"eval_runtime": 4.2748, |
|
"eval_samples_per_second": 234.162, |
|
"eval_steps_per_second": 14.737, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.87034101164536e-05, |
|
"loss": 5.8187, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8673247042812266e-05, |
|
"loss": 5.826, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 5.854990482330322, |
|
"eval_runtime": 6.3825, |
|
"eval_samples_per_second": 156.835, |
|
"eval_steps_per_second": 9.871, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8643083969170926e-05, |
|
"loss": 5.8221, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.861292089552959e-05, |
|
"loss": 5.8109, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 5.85267448425293, |
|
"eval_runtime": 4.2815, |
|
"eval_samples_per_second": 233.795, |
|
"eval_steps_per_second": 14.714, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.858275782188826e-05, |
|
"loss": 5.8124, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.855259474824693e-05, |
|
"loss": 5.8002, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 5.857957363128662, |
|
"eval_runtime": 4.2472, |
|
"eval_samples_per_second": 235.683, |
|
"eval_steps_per_second": 14.833, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.852243167460559e-05, |
|
"loss": 5.7933, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.849232892711154e-05, |
|
"loss": 5.7957, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 5.843862533569336, |
|
"eval_runtime": 4.2251, |
|
"eval_samples_per_second": 236.92, |
|
"eval_steps_per_second": 14.911, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.8462165853470204e-05, |
|
"loss": 5.7919, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.8432002779828864e-05, |
|
"loss": 5.8001, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 5.8175740242004395, |
|
"eval_runtime": 4.5898, |
|
"eval_samples_per_second": 218.094, |
|
"eval_steps_per_second": 13.726, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.840183970618754e-05, |
|
"loss": 5.7874, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.837173695869348e-05, |
|
"loss": 5.7836, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 5.7963480949401855, |
|
"eval_runtime": 4.2794, |
|
"eval_samples_per_second": 233.909, |
|
"eval_steps_per_second": 14.722, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.834157388505215e-05, |
|
"loss": 5.7915, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8311410811410814e-05, |
|
"loss": 5.7797, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 5.796699047088623, |
|
"eval_runtime": 4.2933, |
|
"eval_samples_per_second": 233.155, |
|
"eval_steps_per_second": 14.674, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.828124773776948e-05, |
|
"loss": 5.7768, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8251144990275424e-05, |
|
"loss": 5.7876, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 5.799530506134033, |
|
"eval_runtime": 4.2207, |
|
"eval_samples_per_second": 237.163, |
|
"eval_steps_per_second": 14.926, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.82209819166341e-05, |
|
"loss": 5.7904, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.819081884299276e-05, |
|
"loss": 5.7837, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 5.798836708068848, |
|
"eval_runtime": 4.2831, |
|
"eval_samples_per_second": 233.71, |
|
"eval_steps_per_second": 14.709, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8160655769351425e-05, |
|
"loss": 5.778, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.813049269571009e-05, |
|
"loss": 5.7681, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 5.811764240264893, |
|
"eval_runtime": 4.3287, |
|
"eval_samples_per_second": 231.246, |
|
"eval_steps_per_second": 14.554, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.810032962206876e-05, |
|
"loss": 5.7695, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.80702268745747e-05, |
|
"loss": 5.7698, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 5.799587726593018, |
|
"eval_runtime": 6.2437, |
|
"eval_samples_per_second": 160.322, |
|
"eval_steps_per_second": 10.09, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.804006380093337e-05, |
|
"loss": 5.7613, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.8009900727292036e-05, |
|
"loss": 5.7875, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 5.817694664001465, |
|
"eval_runtime": 4.2624, |
|
"eval_samples_per_second": 234.843, |
|
"eval_steps_per_second": 14.78, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7979737653650696e-05, |
|
"loss": 5.7868, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.794957458000936e-05, |
|
"loss": 5.7672, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 5.817095756530762, |
|
"eval_runtime": 4.3001, |
|
"eval_samples_per_second": 232.788, |
|
"eval_steps_per_second": 14.651, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.791947183251531e-05, |
|
"loss": 5.7585, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.788930875887398e-05, |
|
"loss": 5.77, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 5.829194068908691, |
|
"eval_runtime": 4.4257, |
|
"eval_samples_per_second": 226.177, |
|
"eval_steps_per_second": 14.235, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.785914568523264e-05, |
|
"loss": 5.7525, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.782898261159131e-05, |
|
"loss": 5.7534, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 5.81154203414917, |
|
"eval_runtime": 4.3328, |
|
"eval_samples_per_second": 231.028, |
|
"eval_steps_per_second": 14.54, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.7798819537949973e-05, |
|
"loss": 5.7679, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.776871679045592e-05, |
|
"loss": 5.7461, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 5.76007604598999, |
|
"eval_runtime": 4.2778, |
|
"eval_samples_per_second": 233.997, |
|
"eval_steps_per_second": 14.727, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.773855371681459e-05, |
|
"loss": 5.7546, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.770839064317326e-05, |
|
"loss": 5.7542, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 5.81342077255249, |
|
"eval_runtime": 4.3112, |
|
"eval_samples_per_second": 232.186, |
|
"eval_steps_per_second": 14.613, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.767822756953192e-05, |
|
"loss": 5.7467, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.764806449589059e-05, |
|
"loss": 5.7487, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 5.810147762298584, |
|
"eval_runtime": 4.2762, |
|
"eval_samples_per_second": 234.084, |
|
"eval_steps_per_second": 14.733, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7617961748396534e-05, |
|
"loss": 5.7516, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7587798674755194e-05, |
|
"loss": 5.7464, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 5.790091514587402, |
|
"eval_runtime": 6.3564, |
|
"eval_samples_per_second": 157.479, |
|
"eval_steps_per_second": 9.911, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.755763560111387e-05, |
|
"loss": 5.7518, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.752747252747253e-05, |
|
"loss": 5.7573, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 5.804274082183838, |
|
"eval_runtime": 4.355, |
|
"eval_samples_per_second": 229.852, |
|
"eval_steps_per_second": 14.466, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7497309453831195e-05, |
|
"loss": 5.7512, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.746714638018986e-05, |
|
"loss": 5.7431, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 5.761765480041504, |
|
"eval_runtime": 4.3203, |
|
"eval_samples_per_second": 231.698, |
|
"eval_steps_per_second": 14.582, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.743704363269581e-05, |
|
"loss": 5.7573, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.740688055905447e-05, |
|
"loss": 5.7202, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 5.7786478996276855, |
|
"eval_runtime": 4.3352, |
|
"eval_samples_per_second": 230.9, |
|
"eval_steps_per_second": 14.532, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.737671748541314e-05, |
|
"loss": 5.7353, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7346554411771805e-05, |
|
"loss": 5.7476, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 5.780131816864014, |
|
"eval_runtime": 4.4034, |
|
"eval_samples_per_second": 227.326, |
|
"eval_steps_per_second": 14.307, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.731639133813047e-05, |
|
"loss": 5.7345, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7286288590636415e-05, |
|
"loss": 5.7416, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 5.7368245124816895, |
|
"eval_runtime": 4.2102, |
|
"eval_samples_per_second": 237.757, |
|
"eval_steps_per_second": 14.964, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.725612551699509e-05, |
|
"loss": 5.7451, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.722596244335375e-05, |
|
"loss": 5.7401, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 5.775325298309326, |
|
"eval_runtime": 4.2649, |
|
"eval_samples_per_second": 234.709, |
|
"eval_steps_per_second": 14.772, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7195799369712416e-05, |
|
"loss": 5.7308, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.716563629607108e-05, |
|
"loss": 5.7401, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 5.7712297439575195, |
|
"eval_runtime": 4.3345, |
|
"eval_samples_per_second": 230.935, |
|
"eval_steps_per_second": 14.534, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7135533548577026e-05, |
|
"loss": 5.7317, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.710537047493569e-05, |
|
"loss": 5.7336, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 5.742641925811768, |
|
"eval_runtime": 4.2969, |
|
"eval_samples_per_second": 232.958, |
|
"eval_steps_per_second": 14.662, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.707520740129436e-05, |
|
"loss": 5.7449, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.704510465380031e-05, |
|
"loss": 5.7297, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 5.769224643707275, |
|
"eval_runtime": 4.3211, |
|
"eval_samples_per_second": 231.651, |
|
"eval_steps_per_second": 14.579, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.701494158015897e-05, |
|
"loss": 5.7413, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.698477850651764e-05, |
|
"loss": 5.7347, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 5.765679359436035, |
|
"eval_runtime": 4.2691, |
|
"eval_samples_per_second": 234.477, |
|
"eval_steps_per_second": 14.757, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.69546154328763e-05, |
|
"loss": 5.7256, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.692445235923497e-05, |
|
"loss": 5.7183, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 5.7880048751831055, |
|
"eval_runtime": 8.1705, |
|
"eval_samples_per_second": 122.514, |
|
"eval_steps_per_second": 7.711, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.689428928559364e-05, |
|
"loss": 5.7377, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.6864126211952304e-05, |
|
"loss": 5.7239, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 5.784894943237305, |
|
"eval_runtime": 4.3979, |
|
"eval_samples_per_second": 227.609, |
|
"eval_steps_per_second": 14.325, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.6833963138310964e-05, |
|
"loss": 5.7332, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.680380006466963e-05, |
|
"loss": 5.7316, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 5.74093770980835, |
|
"eval_runtime": 4.3725, |
|
"eval_samples_per_second": 228.931, |
|
"eval_steps_per_second": 14.408, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.67736369910283e-05, |
|
"loss": 5.7227, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6743473917386965e-05, |
|
"loss": 5.7202, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 5.766174793243408, |
|
"eval_runtime": 4.2835, |
|
"eval_samples_per_second": 233.689, |
|
"eval_steps_per_second": 14.708, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.671337116989291e-05, |
|
"loss": 5.7194, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.668326842239886e-05, |
|
"loss": 5.7073, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 5.740816593170166, |
|
"eval_runtime": 4.2663, |
|
"eval_samples_per_second": 234.632, |
|
"eval_steps_per_second": 14.767, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6653105348757525e-05, |
|
"loss": 5.7176, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.662294227511619e-05, |
|
"loss": 5.7113, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 5.764217376708984, |
|
"eval_runtime": 4.4652, |
|
"eval_samples_per_second": 224.178, |
|
"eval_steps_per_second": 14.109, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.659277920147486e-05, |
|
"loss": 5.7094, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.656261612783352e-05, |
|
"loss": 5.722, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 5.7554473876953125, |
|
"eval_runtime": 4.3539, |
|
"eval_samples_per_second": 229.907, |
|
"eval_steps_per_second": 14.47, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6532453054192186e-05, |
|
"loss": 5.7145, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.650228998055085e-05, |
|
"loss": 5.7077, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 5.74335241317749, |
|
"eval_runtime": 4.3356, |
|
"eval_samples_per_second": 230.882, |
|
"eval_steps_per_second": 14.531, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.64721872330568e-05, |
|
"loss": 5.7123, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.644202415941546e-05, |
|
"loss": 5.7163, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 5.77748441696167, |
|
"eval_runtime": 4.4579, |
|
"eval_samples_per_second": 224.543, |
|
"eval_steps_per_second": 14.132, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6411861085774136e-05, |
|
"loss": 5.6978, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6381698012132796e-05, |
|
"loss": 5.7185, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 5.755308151245117, |
|
"eval_runtime": 4.4657, |
|
"eval_samples_per_second": 224.155, |
|
"eval_steps_per_second": 14.108, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.635153493849146e-05, |
|
"loss": 5.7133, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.632137186485013e-05, |
|
"loss": 5.7126, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 5.742791175842285, |
|
"eval_runtime": 4.4874, |
|
"eval_samples_per_second": 223.069, |
|
"eval_steps_per_second": 14.039, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.62912087912088e-05, |
|
"loss": 5.7105, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.626104571756746e-05, |
|
"loss": 5.6955, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 5.750290870666504, |
|
"eval_runtime": 4.4288, |
|
"eval_samples_per_second": 226.023, |
|
"eval_steps_per_second": 14.225, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6230882643926124e-05, |
|
"loss": 5.7056, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6200779896432074e-05, |
|
"loss": 5.7177, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 5.7632975578308105, |
|
"eval_runtime": 4.4407, |
|
"eval_samples_per_second": 225.416, |
|
"eval_steps_per_second": 14.187, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6170616822790734e-05, |
|
"loss": 5.6985, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.61404537491494e-05, |
|
"loss": 5.7103, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 5.735149383544922, |
|
"eval_runtime": 4.3259, |
|
"eval_samples_per_second": 231.399, |
|
"eval_steps_per_second": 14.564, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.611029067550807e-05, |
|
"loss": 5.7056, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.6080127601866735e-05, |
|
"loss": 5.7036, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 5.733882427215576, |
|
"eval_runtime": 4.2423, |
|
"eval_samples_per_second": 235.959, |
|
"eval_steps_per_second": 14.851, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.605002485437268e-05, |
|
"loss": 5.7037, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.601986178073135e-05, |
|
"loss": 5.7035, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 5.746898651123047, |
|
"eval_runtime": 4.2969, |
|
"eval_samples_per_second": 232.956, |
|
"eval_steps_per_second": 14.662, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.598969870709001e-05, |
|
"loss": 5.6979, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.595953563344868e-05, |
|
"loss": 5.6953, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 5.743001461029053, |
|
"eval_runtime": 6.5732, |
|
"eval_samples_per_second": 152.285, |
|
"eval_steps_per_second": 9.584, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.592943288595463e-05, |
|
"loss": 5.7201, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.5899269812313295e-05, |
|
"loss": 5.704, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 5.756634712219238, |
|
"eval_runtime": 4.4285, |
|
"eval_samples_per_second": 226.037, |
|
"eval_steps_per_second": 14.226, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.5869106738671955e-05, |
|
"loss": 5.7095, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.583894366503063e-05, |
|
"loss": 5.712, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 5.764980316162109, |
|
"eval_runtime": 4.3235, |
|
"eval_samples_per_second": 231.523, |
|
"eval_steps_per_second": 14.571, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.580884091753657e-05, |
|
"loss": 5.6976, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.577867784389523e-05, |
|
"loss": 5.7046, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 5.744942665100098, |
|
"eval_runtime": 4.3426, |
|
"eval_samples_per_second": 230.505, |
|
"eval_steps_per_second": 14.507, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5748514770253906e-05, |
|
"loss": 5.7025, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5718351696612566e-05, |
|
"loss": 5.7031, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 5.732196807861328, |
|
"eval_runtime": 4.4018, |
|
"eval_samples_per_second": 227.407, |
|
"eval_steps_per_second": 14.312, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.568818862297123e-05, |
|
"loss": 5.6871, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.56580255493299e-05, |
|
"loss": 5.6842, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 5.773156642913818, |
|
"eval_runtime": 4.3306, |
|
"eval_samples_per_second": 231.146, |
|
"eval_steps_per_second": 14.548, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.562786247568857e-05, |
|
"loss": 5.6853, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.559769940204723e-05, |
|
"loss": 5.7022, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 5.724014759063721, |
|
"eval_runtime": 4.268, |
|
"eval_samples_per_second": 234.536, |
|
"eval_steps_per_second": 14.761, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5567536328405894e-05, |
|
"loss": 5.7029, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.553737325476456e-05, |
|
"loss": 5.707, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 5.71505880355835, |
|
"eval_runtime": 4.2511, |
|
"eval_samples_per_second": 235.471, |
|
"eval_steps_per_second": 14.82, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.550727050727051e-05, |
|
"loss": 5.7014, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.547710743362917e-05, |
|
"loss": 5.7068, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 5.703161239624023, |
|
"eval_runtime": 4.3019, |
|
"eval_samples_per_second": 232.687, |
|
"eval_steps_per_second": 14.645, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5446944359987844e-05, |
|
"loss": 5.6946, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5416781286346505e-05, |
|
"loss": 5.6892, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 5.7333149909973145, |
|
"eval_runtime": 4.2526, |
|
"eval_samples_per_second": 235.387, |
|
"eval_steps_per_second": 14.815, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5386678538852454e-05, |
|
"loss": 5.6814, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.535651546521112e-05, |
|
"loss": 5.6979, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 5.712403297424316, |
|
"eval_runtime": 4.3234, |
|
"eval_samples_per_second": 231.532, |
|
"eval_steps_per_second": 14.572, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.532635239156979e-05, |
|
"loss": 5.6959, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.529618931792845e-05, |
|
"loss": 5.6791, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 5.754906177520752, |
|
"eval_runtime": 4.3238, |
|
"eval_samples_per_second": 231.508, |
|
"eval_steps_per_second": 14.57, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5266026244287115e-05, |
|
"loss": 5.6984, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5235923496793065e-05, |
|
"loss": 5.6953, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 5.720048427581787, |
|
"eval_runtime": 4.3352, |
|
"eval_samples_per_second": 230.899, |
|
"eval_steps_per_second": 14.532, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5205760423151725e-05, |
|
"loss": 5.6924, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.51755973495104e-05, |
|
"loss": 5.6845, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 5.744686126708984, |
|
"eval_runtime": 4.3839, |
|
"eval_samples_per_second": 228.335, |
|
"eval_steps_per_second": 14.371, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.514543427586906e-05, |
|
"loss": 5.695, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.511533152837501e-05, |
|
"loss": 5.6872, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 5.723191738128662, |
|
"eval_runtime": 4.2942, |
|
"eval_samples_per_second": 233.107, |
|
"eval_steps_per_second": 14.671, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5085168454733675e-05, |
|
"loss": 5.6841, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.505500538109234e-05, |
|
"loss": 5.687, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 5.738052845001221, |
|
"eval_runtime": 4.282, |
|
"eval_samples_per_second": 233.769, |
|
"eval_steps_per_second": 14.713, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.5024842307451e-05, |
|
"loss": 5.6913, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.499467923380967e-05, |
|
"loss": 5.69, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 5.709909915924072, |
|
"eval_runtime": 4.3445, |
|
"eval_samples_per_second": 230.406, |
|
"eval_steps_per_second": 14.501, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.496457648631562e-05, |
|
"loss": 5.6785, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4934413412674286e-05, |
|
"loss": 5.6831, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 5.698389053344727, |
|
"eval_runtime": 6.3376, |
|
"eval_samples_per_second": 157.947, |
|
"eval_steps_per_second": 9.941, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4904250339032946e-05, |
|
"loss": 5.6893, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4874147591538896e-05, |
|
"loss": 5.682, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 5.747517108917236, |
|
"eval_runtime": 4.4117, |
|
"eval_samples_per_second": 226.897, |
|
"eval_steps_per_second": 14.28, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.484398451789756e-05, |
|
"loss": 5.6907, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.481382144425623e-05, |
|
"loss": 5.6771, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 5.721287727355957, |
|
"eval_runtime": 4.3352, |
|
"eval_samples_per_second": 230.902, |
|
"eval_steps_per_second": 14.532, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.47836583706149e-05, |
|
"loss": 5.6743, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.475349529697356e-05, |
|
"loss": 5.6818, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 5.736617088317871, |
|
"eval_runtime": 4.3397, |
|
"eval_samples_per_second": 230.66, |
|
"eval_steps_per_second": 14.517, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4723332223332224e-05, |
|
"loss": 5.6836, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.469316914969089e-05, |
|
"loss": 5.6862, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 5.706295490264893, |
|
"eval_runtime": 4.3399, |
|
"eval_samples_per_second": 230.65, |
|
"eval_steps_per_second": 14.516, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.466300607604956e-05, |
|
"loss": 5.6792, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.46329033285555e-05, |
|
"loss": 5.6706, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 5.7102370262146, |
|
"eval_runtime": 4.2947, |
|
"eval_samples_per_second": 233.076, |
|
"eval_steps_per_second": 14.669, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4602740254914174e-05, |
|
"loss": 5.6775, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4572577181272834e-05, |
|
"loss": 5.6662, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 5.69236421585083, |
|
"eval_runtime": 6.3959, |
|
"eval_samples_per_second": 156.506, |
|
"eval_steps_per_second": 9.85, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.45424141076315e-05, |
|
"loss": 5.6691, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4512371686284734e-05, |
|
"loss": 5.6839, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 5.711584091186523, |
|
"eval_runtime": 4.2918, |
|
"eval_samples_per_second": 233.234, |
|
"eval_steps_per_second": 14.679, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.44822086126434e-05, |
|
"loss": 5.6725, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.445204553900206e-05, |
|
"loss": 5.6789, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 5.663390159606934, |
|
"eval_runtime": 4.3322, |
|
"eval_samples_per_second": 231.063, |
|
"eval_steps_per_second": 14.542, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.442188246536073e-05, |
|
"loss": 5.6765, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4391719391719395e-05, |
|
"loss": 5.6618, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 5.73824405670166, |
|
"eval_runtime": 4.2539, |
|
"eval_samples_per_second": 235.311, |
|
"eval_steps_per_second": 14.81, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4361556318078055e-05, |
|
"loss": 5.6636, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.433139324443673e-05, |
|
"loss": 5.6733, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 5.712129592895508, |
|
"eval_runtime": 4.3264, |
|
"eval_samples_per_second": 231.369, |
|
"eval_steps_per_second": 14.562, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.430123017079539e-05, |
|
"loss": 5.6787, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.427112742330134e-05, |
|
"loss": 5.6709, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 5.736870288848877, |
|
"eval_runtime": 4.381, |
|
"eval_samples_per_second": 228.487, |
|
"eval_steps_per_second": 14.38, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4240964349660005e-05, |
|
"loss": 5.6618, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.421080127601867e-05, |
|
"loss": 5.6627, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 5.6889142990112305, |
|
"eval_runtime": 4.2821, |
|
"eval_samples_per_second": 233.762, |
|
"eval_steps_per_second": 14.712, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.418063820237733e-05, |
|
"loss": 5.6751, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.415053545488328e-05, |
|
"loss": 5.655, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 5.685154438018799, |
|
"eval_runtime": 6.9191, |
|
"eval_samples_per_second": 144.671, |
|
"eval_steps_per_second": 9.105, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.412037238124195e-05, |
|
"loss": 5.6734, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4090209307600616e-05, |
|
"loss": 5.6649, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 5.713215351104736, |
|
"eval_runtime": 4.2937, |
|
"eval_samples_per_second": 233.132, |
|
"eval_steps_per_second": 14.673, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.4060046233959276e-05, |
|
"loss": 5.6724, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.4029943486465226e-05, |
|
"loss": 5.6634, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 5.674860000610352, |
|
"eval_runtime": 4.3043, |
|
"eval_samples_per_second": 232.557, |
|
"eval_steps_per_second": 14.636, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.399978041282389e-05, |
|
"loss": 5.6674, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.396961733918255e-05, |
|
"loss": 5.6625, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 5.694820880889893, |
|
"eval_runtime": 4.396, |
|
"eval_samples_per_second": 227.709, |
|
"eval_steps_per_second": 14.331, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.393951459168851e-05, |
|
"loss": 5.6711, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.390935151804717e-05, |
|
"loss": 5.6721, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 5.711126804351807, |
|
"eval_runtime": 6.4814, |
|
"eval_samples_per_second": 154.443, |
|
"eval_steps_per_second": 9.72, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3879188444405836e-05, |
|
"loss": 5.6574, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.38490253707645e-05, |
|
"loss": 5.6608, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 5.73061990737915, |
|
"eval_runtime": 4.2517, |
|
"eval_samples_per_second": 235.437, |
|
"eval_steps_per_second": 14.818, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.381886229712317e-05, |
|
"loss": 5.6663, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.378869922348183e-05, |
|
"loss": 5.6685, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 5.769214630126953, |
|
"eval_runtime": 4.3149, |
|
"eval_samples_per_second": 231.988, |
|
"eval_steps_per_second": 14.601, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3758536149840504e-05, |
|
"loss": 5.6631, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3728373076199164e-05, |
|
"loss": 5.6557, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 5.712480545043945, |
|
"eval_runtime": 4.3273, |
|
"eval_samples_per_second": 231.325, |
|
"eval_steps_per_second": 14.559, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3698270328705114e-05, |
|
"loss": 5.6767, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3668167581211063e-05, |
|
"loss": 5.6665, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 5.739169120788574, |
|
"eval_runtime": 4.2992, |
|
"eval_samples_per_second": 232.836, |
|
"eval_steps_per_second": 14.654, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.363800450756973e-05, |
|
"loss": 5.6699, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.360784143392839e-05, |
|
"loss": 5.6603, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 5.6816325187683105, |
|
"eval_runtime": 4.353, |
|
"eval_samples_per_second": 229.954, |
|
"eval_steps_per_second": 14.473, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.357767836028706e-05, |
|
"loss": 5.6717, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3547515286645725e-05, |
|
"loss": 5.6712, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 5.7074737548828125, |
|
"eval_runtime": 4.3743, |
|
"eval_samples_per_second": 228.834, |
|
"eval_steps_per_second": 14.402, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3517352213004385e-05, |
|
"loss": 5.6676, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.348718913936305e-05, |
|
"loss": 5.6547, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 5.6991777420043945, |
|
"eval_runtime": 4.4989, |
|
"eval_samples_per_second": 222.5, |
|
"eval_steps_per_second": 14.004, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3457086391869e-05, |
|
"loss": 5.6638, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.342692331822767e-05, |
|
"loss": 5.6511, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 5.679354190826416, |
|
"eval_runtime": 4.2833, |
|
"eval_samples_per_second": 233.696, |
|
"eval_steps_per_second": 14.708, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3396760244586335e-05, |
|
"loss": 5.6673, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3366597170945e-05, |
|
"loss": 5.6663, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 5.680385589599609, |
|
"eval_runtime": 4.3563, |
|
"eval_samples_per_second": 229.783, |
|
"eval_steps_per_second": 14.462, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.333643409730366e-05, |
|
"loss": 5.6682, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.330627102366233e-05, |
|
"loss": 5.6562, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 5.730945587158203, |
|
"eval_runtime": 6.374, |
|
"eval_samples_per_second": 157.044, |
|
"eval_steps_per_second": 9.884, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3276107950020996e-05, |
|
"loss": 5.6656, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3246005202526946e-05, |
|
"loss": 5.6546, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 5.708312034606934, |
|
"eval_runtime": 4.2353, |
|
"eval_samples_per_second": 236.345, |
|
"eval_steps_per_second": 14.875, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3215842128885606e-05, |
|
"loss": 5.6529, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.318567905524428e-05, |
|
"loss": 5.656, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 5.711429119110107, |
|
"eval_runtime": 4.3481, |
|
"eval_samples_per_second": 230.216, |
|
"eval_steps_per_second": 14.489, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.315551598160294e-05, |
|
"loss": 5.6731, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.312535290796161e-05, |
|
"loss": 5.6712, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 5.6710896492004395, |
|
"eval_runtime": 4.2998, |
|
"eval_samples_per_second": 232.804, |
|
"eval_steps_per_second": 14.652, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3095189834320274e-05, |
|
"loss": 5.6619, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.306502676067894e-05, |
|
"loss": 5.6473, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 5.6910600662231445, |
|
"eval_runtime": 4.3442, |
|
"eval_samples_per_second": 230.421, |
|
"eval_steps_per_second": 14.502, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.3034924013184884e-05, |
|
"loss": 5.6577, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.300476093954355e-05, |
|
"loss": 5.6352, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 5.7348408699035645, |
|
"eval_runtime": 4.3322, |
|
"eval_samples_per_second": 231.062, |
|
"eval_steps_per_second": 14.542, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.297459786590222e-05, |
|
"loss": 5.647, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.294443479226088e-05, |
|
"loss": 5.6602, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 5.7037835121154785, |
|
"eval_runtime": 6.3915, |
|
"eval_samples_per_second": 156.614, |
|
"eval_steps_per_second": 9.857, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.2914271718619545e-05, |
|
"loss": 5.6598, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.288410864497821e-05, |
|
"loss": 5.645, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 5.670388698577881, |
|
"eval_runtime": 4.2976, |
|
"eval_samples_per_second": 232.922, |
|
"eval_steps_per_second": 14.659, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.285394557133688e-05, |
|
"loss": 5.637, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.282378249769554e-05, |
|
"loss": 5.6611, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 5.681214332580566, |
|
"eval_runtime": 4.3365, |
|
"eval_samples_per_second": 230.833, |
|
"eval_steps_per_second": 14.528, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.2793679750201495e-05, |
|
"loss": 5.6413, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.276357700270744e-05, |
|
"loss": 5.6442, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 5.678561687469482, |
|
"eval_runtime": 4.2838, |
|
"eval_samples_per_second": 233.671, |
|
"eval_steps_per_second": 14.707, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2733413929066105e-05, |
|
"loss": 5.6429, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.270325085542477e-05, |
|
"loss": 5.6572, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 5.70313024520874, |
|
"eval_runtime": 4.2374, |
|
"eval_samples_per_second": 236.231, |
|
"eval_steps_per_second": 14.868, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.267308778178344e-05, |
|
"loss": 5.6563, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.26429247081421e-05, |
|
"loss": 5.6631, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 5.692766189575195, |
|
"eval_runtime": 4.2623, |
|
"eval_samples_per_second": 234.85, |
|
"eval_steps_per_second": 14.781, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.261276163450077e-05, |
|
"loss": 5.6386, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.258259856085943e-05, |
|
"loss": 5.6553, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 5.708261013031006, |
|
"eval_runtime": 4.2737, |
|
"eval_samples_per_second": 234.224, |
|
"eval_steps_per_second": 14.741, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2552495813365376e-05, |
|
"loss": 5.6524, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.252233273972405e-05, |
|
"loss": 5.6521, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 5.687132358551025, |
|
"eval_runtime": 4.4286, |
|
"eval_samples_per_second": 226.029, |
|
"eval_steps_per_second": 14.226, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.249216966608271e-05, |
|
"loss": 5.6559, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2462006592441377e-05, |
|
"loss": 5.6644, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 5.712847709655762, |
|
"eval_runtime": 4.3174, |
|
"eval_samples_per_second": 231.852, |
|
"eval_steps_per_second": 14.592, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2431843518800044e-05, |
|
"loss": 5.6407, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.240174077130599e-05, |
|
"loss": 5.6421, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 5.716492652893066, |
|
"eval_runtime": 4.2976, |
|
"eval_samples_per_second": 232.92, |
|
"eval_steps_per_second": 14.659, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.237157769766465e-05, |
|
"loss": 5.6653, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.234141462402332e-05, |
|
"loss": 5.6465, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 5.711600303649902, |
|
"eval_runtime": 4.3593, |
|
"eval_samples_per_second": 229.622, |
|
"eval_steps_per_second": 14.452, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.231125155038199e-05, |
|
"loss": 5.6545, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2281088476740654e-05, |
|
"loss": 5.653, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 5.694251537322998, |
|
"eval_runtime": 6.4453, |
|
"eval_samples_per_second": 155.307, |
|
"eval_steps_per_second": 9.775, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2250925403099314e-05, |
|
"loss": 5.6472, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.222076232945799e-05, |
|
"loss": 5.6546, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 5.6814446449279785, |
|
"eval_runtime": 4.3371, |
|
"eval_samples_per_second": 230.802, |
|
"eval_steps_per_second": 14.526, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.219059925581665e-05, |
|
"loss": 5.6483, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.216055683446988e-05, |
|
"loss": 5.654, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 5.7155609130859375, |
|
"eval_runtime": 4.3569, |
|
"eval_samples_per_second": 229.748, |
|
"eval_steps_per_second": 14.46, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.213039376082855e-05, |
|
"loss": 5.6427, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.210023068718721e-05, |
|
"loss": 5.6526, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 5.657435417175293, |
|
"eval_runtime": 4.288, |
|
"eval_samples_per_second": 233.441, |
|
"eval_steps_per_second": 14.692, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.2070067613545875e-05, |
|
"loss": 5.6495, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.203990453990454e-05, |
|
"loss": 5.649, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 5.6852569580078125, |
|
"eval_runtime": 4.3096, |
|
"eval_samples_per_second": 232.275, |
|
"eval_steps_per_second": 14.619, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.200980179241049e-05, |
|
"loss": 5.6591, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.197963871876915e-05, |
|
"loss": 5.6427, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 5.6808366775512695, |
|
"eval_runtime": 4.2828, |
|
"eval_samples_per_second": 233.726, |
|
"eval_steps_per_second": 14.71, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.1949475645127825e-05, |
|
"loss": 5.6409, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1919312571486485e-05, |
|
"loss": 5.6436, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 5.664061546325684, |
|
"eval_runtime": 4.2839, |
|
"eval_samples_per_second": 233.663, |
|
"eval_steps_per_second": 14.706, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.188914949784515e-05, |
|
"loss": 5.6421, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.185898642420382e-05, |
|
"loss": 5.6319, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 5.68691349029541, |
|
"eval_runtime": 4.3423, |
|
"eval_samples_per_second": 230.522, |
|
"eval_steps_per_second": 14.508, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1828823350562486e-05, |
|
"loss": 5.6542, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1798660276921146e-05, |
|
"loss": 5.6393, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 5.684676647186279, |
|
"eval_runtime": 4.3763, |
|
"eval_samples_per_second": 228.73, |
|
"eval_steps_per_second": 14.396, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.176849720327981e-05, |
|
"loss": 5.6442, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.173839445578576e-05, |
|
"loss": 5.6363, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 5.680596828460693, |
|
"eval_runtime": 4.349, |
|
"eval_samples_per_second": 230.166, |
|
"eval_steps_per_second": 14.486, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.170823138214443e-05, |
|
"loss": 5.6416, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.167812863465038e-05, |
|
"loss": 5.648, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 5.648463249206543, |
|
"eval_runtime": 4.3014, |
|
"eval_samples_per_second": 232.713, |
|
"eval_steps_per_second": 14.646, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.164796556100904e-05, |
|
"loss": 5.6414, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1617802487367706e-05, |
|
"loss": 5.6297, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 5.679803371429443, |
|
"eval_runtime": 4.2827, |
|
"eval_samples_per_second": 233.729, |
|
"eval_steps_per_second": 14.71, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1587639413726373e-05, |
|
"loss": 5.6411, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.155747634008504e-05, |
|
"loss": 5.6442, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 5.673394203186035, |
|
"eval_runtime": 4.3412, |
|
"eval_samples_per_second": 230.582, |
|
"eval_steps_per_second": 14.512, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.15273132664437e-05, |
|
"loss": 5.6313, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.149715019280237e-05, |
|
"loss": 5.6546, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 5.6783528327941895, |
|
"eval_runtime": 4.4144, |
|
"eval_samples_per_second": 226.756, |
|
"eval_steps_per_second": 14.271, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.146704744530832e-05, |
|
"loss": 5.6374, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.1436884371666984e-05, |
|
"loss": 5.6325, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 5.645352840423584, |
|
"eval_runtime": 4.3033, |
|
"eval_samples_per_second": 232.612, |
|
"eval_steps_per_second": 14.64, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1406721298025644e-05, |
|
"loss": 5.643, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.137655822438432e-05, |
|
"loss": 5.6386, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 5.695677757263184, |
|
"eval_runtime": 4.2949, |
|
"eval_samples_per_second": 233.065, |
|
"eval_steps_per_second": 14.668, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.134639515074298e-05, |
|
"loss": 5.6337, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.131629240324893e-05, |
|
"loss": 5.6236, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 5.678653240203857, |
|
"eval_runtime": 4.4357, |
|
"eval_samples_per_second": 225.669, |
|
"eval_steps_per_second": 14.203, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1286129329607595e-05, |
|
"loss": 5.639, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1255966255966255e-05, |
|
"loss": 5.6269, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 5.681842803955078, |
|
"eval_runtime": 4.3725, |
|
"eval_samples_per_second": 228.929, |
|
"eval_steps_per_second": 14.408, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.122580318232492e-05, |
|
"loss": 5.635, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.119564010868359e-05, |
|
"loss": 5.6285, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 5.673871040344238, |
|
"eval_runtime": 4.3771, |
|
"eval_samples_per_second": 228.689, |
|
"eval_steps_per_second": 14.393, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.1165477035042256e-05, |
|
"loss": 5.6383, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.11353742875482e-05, |
|
"loss": 5.65, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 5.630629539489746, |
|
"eval_runtime": 4.41, |
|
"eval_samples_per_second": 226.983, |
|
"eval_steps_per_second": 14.286, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.110521121390687e-05, |
|
"loss": 5.6381, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.107504814026553e-05, |
|
"loss": 5.6313, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 5.6462812423706055, |
|
"eval_runtime": 4.3445, |
|
"eval_samples_per_second": 230.407, |
|
"eval_steps_per_second": 14.501, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.10448850666242e-05, |
|
"loss": 5.6347, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.1014721992982866e-05, |
|
"loss": 5.6412, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 5.668566703796387, |
|
"eval_runtime": 4.3418, |
|
"eval_samples_per_second": 230.551, |
|
"eval_steps_per_second": 14.51, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.098455891934153e-05, |
|
"loss": 5.639, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.0954456171847476e-05, |
|
"loss": 5.6278, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 5.6881489753723145, |
|
"eval_runtime": 4.455, |
|
"eval_samples_per_second": 224.692, |
|
"eval_steps_per_second": 14.141, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.092429309820614e-05, |
|
"loss": 5.6408, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.089413002456481e-05, |
|
"loss": 5.637, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 5.688764572143555, |
|
"eval_runtime": 6.2867, |
|
"eval_samples_per_second": 159.224, |
|
"eval_steps_per_second": 10.021, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.086396695092348e-05, |
|
"loss": 5.646, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.083380387728214e-05, |
|
"loss": 5.626, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 5.671611785888672, |
|
"eval_runtime": 4.3616, |
|
"eval_samples_per_second": 229.503, |
|
"eval_steps_per_second": 14.444, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.080364080364081e-05, |
|
"loss": 5.6378, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0773538056146754e-05, |
|
"loss": 5.6338, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 5.616012096405029, |
|
"eval_runtime": 4.4085, |
|
"eval_samples_per_second": 227.06, |
|
"eval_steps_per_second": 14.29, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0743374982505414e-05, |
|
"loss": 5.6318, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.071321190886409e-05, |
|
"loss": 5.6361, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 5.667599201202393, |
|
"eval_runtime": 6.2583, |
|
"eval_samples_per_second": 159.947, |
|
"eval_steps_per_second": 10.067, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.068304883522275e-05, |
|
"loss": 5.625, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0652885761581415e-05, |
|
"loss": 5.6336, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 5.684675216674805, |
|
"eval_runtime": 4.3154, |
|
"eval_samples_per_second": 231.962, |
|
"eval_steps_per_second": 14.599, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.062272268794008e-05, |
|
"loss": 5.6388, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.059255961429875e-05, |
|
"loss": 5.6351, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 5.675138473510742, |
|
"eval_runtime": 4.3055, |
|
"eval_samples_per_second": 232.494, |
|
"eval_steps_per_second": 14.632, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.056239654065741e-05, |
|
"loss": 5.6266, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.053229379316336e-05, |
|
"loss": 5.6408, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 5.615994453430176, |
|
"eval_runtime": 4.3074, |
|
"eval_samples_per_second": 232.39, |
|
"eval_steps_per_second": 14.626, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0502130719522025e-05, |
|
"loss": 5.6399, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.047196764588069e-05, |
|
"loss": 5.6232, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 5.652904033660889, |
|
"eval_runtime": 4.3343, |
|
"eval_samples_per_second": 230.949, |
|
"eval_steps_per_second": 14.535, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.044186489838664e-05, |
|
"loss": 5.6249, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.041170182474531e-05, |
|
"loss": 5.6319, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 5.64531946182251, |
|
"eval_runtime": 5.6252, |
|
"eval_samples_per_second": 177.948, |
|
"eval_steps_per_second": 11.2, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.038153875110397e-05, |
|
"loss": 5.6244, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0351375677462636e-05, |
|
"loss": 5.6204, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 5.661260604858398, |
|
"eval_runtime": 4.3573, |
|
"eval_samples_per_second": 229.732, |
|
"eval_steps_per_second": 14.459, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.03212126038213e-05, |
|
"loss": 5.6353, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0291109856327246e-05, |
|
"loss": 5.6231, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 5.666103363037109, |
|
"eval_runtime": 4.5226, |
|
"eval_samples_per_second": 221.333, |
|
"eval_steps_per_second": 13.93, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.026094678268591e-05, |
|
"loss": 5.6318, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.023078370904458e-05, |
|
"loss": 5.6322, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 5.640702247619629, |
|
"eval_runtime": 4.3357, |
|
"eval_samples_per_second": 230.872, |
|
"eval_steps_per_second": 14.53, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.020062063540325e-05, |
|
"loss": 5.6135, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.017045756176191e-05, |
|
"loss": 5.6369, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 5.658245086669922, |
|
"eval_runtime": 4.353, |
|
"eval_samples_per_second": 229.957, |
|
"eval_steps_per_second": 14.473, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.014029448812058e-05, |
|
"loss": 5.6445, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0110191740626523e-05, |
|
"loss": 5.6337, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 5.672937393188477, |
|
"eval_runtime": 4.4799, |
|
"eval_samples_per_second": 223.441, |
|
"eval_steps_per_second": 14.063, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.008002866698519e-05, |
|
"loss": 5.6349, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.004986559334386e-05, |
|
"loss": 5.6278, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 5.6635026931762695, |
|
"eval_runtime": 6.1168, |
|
"eval_samples_per_second": 163.647, |
|
"eval_steps_per_second": 10.299, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.0019702519702524e-05, |
|
"loss": 5.6318, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.9989539446061184e-05, |
|
"loss": 5.6371, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 5.643805503845215, |
|
"eval_runtime": 4.2716, |
|
"eval_samples_per_second": 234.336, |
|
"eval_steps_per_second": 14.748, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.995943669856714e-05, |
|
"loss": 5.6145, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.99292736249258e-05, |
|
"loss": 5.624, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 5.633095741271973, |
|
"eval_runtime": 4.3138, |
|
"eval_samples_per_second": 232.048, |
|
"eval_steps_per_second": 14.604, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.989911055128447e-05, |
|
"loss": 5.619, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.9868947477643135e-05, |
|
"loss": 5.6198, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 5.6540656089782715, |
|
"eval_runtime": 4.3595, |
|
"eval_samples_per_second": 229.615, |
|
"eval_steps_per_second": 14.451, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.98387844040018e-05, |
|
"loss": 5.624, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.9808681656507745e-05, |
|
"loss": 5.6243, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 5.651320457458496, |
|
"eval_runtime": 4.3133, |
|
"eval_samples_per_second": 232.072, |
|
"eval_steps_per_second": 14.606, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.977851858286641e-05, |
|
"loss": 5.6133, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.974835550922508e-05, |
|
"loss": 5.6118, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 5.682806491851807, |
|
"eval_runtime": 4.4555, |
|
"eval_samples_per_second": 224.668, |
|
"eval_steps_per_second": 14.14, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.971819243558374e-05, |
|
"loss": 5.6085, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.9688029361942406e-05, |
|
"loss": 5.6315, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 5.685708045959473, |
|
"eval_runtime": 4.3306, |
|
"eval_samples_per_second": 231.143, |
|
"eval_steps_per_second": 14.547, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.965786628830107e-05, |
|
"loss": 5.6271, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.962770321465974e-05, |
|
"loss": 5.6208, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 5.64992618560791, |
|
"eval_runtime": 4.409, |
|
"eval_samples_per_second": 227.033, |
|
"eval_steps_per_second": 14.289, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.95975401410184e-05, |
|
"loss": 5.6218, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.9567437393524356e-05, |
|
"loss": 5.6261, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 5.64516544342041, |
|
"eval_runtime": 4.4391, |
|
"eval_samples_per_second": 225.497, |
|
"eval_steps_per_second": 14.192, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.9537274319883016e-05, |
|
"loss": 5.6255, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.950711124624168e-05, |
|
"loss": 5.6247, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 5.677004814147949, |
|
"eval_runtime": 4.3652, |
|
"eval_samples_per_second": 229.316, |
|
"eval_steps_per_second": 14.432, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.947700849874763e-05, |
|
"loss": 5.6099, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.94468454251063e-05, |
|
"loss": 5.6204, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 5.616661548614502, |
|
"eval_runtime": 4.3551, |
|
"eval_samples_per_second": 229.845, |
|
"eval_steps_per_second": 14.466, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.941668235146496e-05, |
|
"loss": 5.6196, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.938651927782363e-05, |
|
"loss": 5.6166, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 5.683280944824219, |
|
"eval_runtime": 4.3154, |
|
"eval_samples_per_second": 231.959, |
|
"eval_steps_per_second": 14.599, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.9356356204182294e-05, |
|
"loss": 5.6303, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.932625345668824e-05, |
|
"loss": 5.6145, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 5.708868980407715, |
|
"eval_runtime": 4.3953, |
|
"eval_samples_per_second": 227.744, |
|
"eval_steps_per_second": 14.334, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.929609038304691e-05, |
|
"loss": 5.6208, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.926592730940557e-05, |
|
"loss": 5.6155, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 5.643332481384277, |
|
"eval_runtime": 4.3751, |
|
"eval_samples_per_second": 228.794, |
|
"eval_steps_per_second": 14.4, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.923576423576424e-05, |
|
"loss": 5.6361, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.9205601162122905e-05, |
|
"loss": 5.6162, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 5.648618221282959, |
|
"eval_runtime": 4.3904, |
|
"eval_samples_per_second": 227.999, |
|
"eval_steps_per_second": 14.35, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.917543808848157e-05, |
|
"loss": 5.6344, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.914527501484023e-05, |
|
"loss": 5.6144, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 5.633224964141846, |
|
"eval_runtime": 4.3929, |
|
"eval_samples_per_second": 227.865, |
|
"eval_steps_per_second": 14.341, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.911517226734618e-05, |
|
"loss": 5.6285, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.908500919370485e-05, |
|
"loss": 5.6198, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 5.645481109619141, |
|
"eval_runtime": 4.3967, |
|
"eval_samples_per_second": 227.672, |
|
"eval_steps_per_second": 14.329, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.9054846120063515e-05, |
|
"loss": 5.6095, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.9024683046422175e-05, |
|
"loss": 5.6231, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 5.6638264656066895, |
|
"eval_runtime": 4.4238, |
|
"eval_samples_per_second": 226.274, |
|
"eval_steps_per_second": 14.241, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.899451997278085e-05, |
|
"loss": 5.6158, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.896441722528679e-05, |
|
"loss": 5.61, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 5.614964962005615, |
|
"eval_runtime": 4.3378, |
|
"eval_samples_per_second": 230.76, |
|
"eval_steps_per_second": 14.523, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.893425415164545e-05, |
|
"loss": 5.5975, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.8904091078004126e-05, |
|
"loss": 5.614, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 5.670050621032715, |
|
"eval_runtime": 4.3122, |
|
"eval_samples_per_second": 232.134, |
|
"eval_steps_per_second": 14.61, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.8873928004362786e-05, |
|
"loss": 5.6029, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.8843825256868736e-05, |
|
"loss": 5.6158, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 5.641917705535889, |
|
"eval_runtime": 4.3145, |
|
"eval_samples_per_second": 232.008, |
|
"eval_steps_per_second": 14.602, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.88136621832274e-05, |
|
"loss": 5.6351, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.878349910958607e-05, |
|
"loss": 5.6163, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 5.651737213134766, |
|
"eval_runtime": 4.3373, |
|
"eval_samples_per_second": 230.79, |
|
"eval_steps_per_second": 14.525, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.875333603594473e-05, |
|
"loss": 5.5979, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.8723172962303403e-05, |
|
"loss": 5.6151, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 5.646933078765869, |
|
"eval_runtime": 4.2964, |
|
"eval_samples_per_second": 232.987, |
|
"eval_steps_per_second": 14.664, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.8693070214809346e-05, |
|
"loss": 5.6236, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.866290714116801e-05, |
|
"loss": 5.6251, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 5.671980381011963, |
|
"eval_runtime": 4.4739, |
|
"eval_samples_per_second": 223.743, |
|
"eval_steps_per_second": 14.082, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.863274406752668e-05, |
|
"loss": 5.6018, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.860258099388535e-05, |
|
"loss": 5.6272, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 5.603824138641357, |
|
"eval_runtime": 4.3496, |
|
"eval_samples_per_second": 230.135, |
|
"eval_steps_per_second": 14.484, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.857247824639129e-05, |
|
"loss": 5.6282, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.854231517274996e-05, |
|
"loss": 5.6291, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 5.6158552169799805, |
|
"eval_runtime": 6.4339, |
|
"eval_samples_per_second": 155.582, |
|
"eval_steps_per_second": 9.792, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.8512152099108624e-05, |
|
"loss": 5.6017, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.8481989025467284e-05, |
|
"loss": 5.6114, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 5.6429595947265625, |
|
"eval_runtime": 4.3858, |
|
"eval_samples_per_second": 228.235, |
|
"eval_steps_per_second": 14.364, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.845182595182595e-05, |
|
"loss": 5.6194, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.842166287818462e-05, |
|
"loss": 5.6128, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 5.634527683258057, |
|
"eval_runtime": 4.3652, |
|
"eval_samples_per_second": 229.313, |
|
"eval_steps_per_second": 14.432, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.8391499804543285e-05, |
|
"loss": 5.62, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.836139705704923e-05, |
|
"loss": 5.6213, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 5.641001224517822, |
|
"eval_runtime": 4.355, |
|
"eval_samples_per_second": 229.851, |
|
"eval_steps_per_second": 14.466, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.83312339834079e-05, |
|
"loss": 5.6182, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.830107090976656e-05, |
|
"loss": 5.6104, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 5.6409525871276855, |
|
"eval_runtime": 4.3644, |
|
"eval_samples_per_second": 229.353, |
|
"eval_steps_per_second": 14.435, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.827090783612523e-05, |
|
"loss": 5.6061, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.8240744762483896e-05, |
|
"loss": 5.6081, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 5.624050617218018, |
|
"eval_runtime": 4.463, |
|
"eval_samples_per_second": 224.291, |
|
"eval_steps_per_second": 14.116, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.821058168884256e-05, |
|
"loss": 5.6234, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.8180478941348505e-05, |
|
"loss": 5.6288, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 5.689996242523193, |
|
"eval_runtime": 6.3147, |
|
"eval_samples_per_second": 158.518, |
|
"eval_steps_per_second": 9.977, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.815031586770718e-05, |
|
"loss": 5.6253, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.812015279406584e-05, |
|
"loss": 5.607, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 5.656425952911377, |
|
"eval_runtime": 4.3871, |
|
"eval_samples_per_second": 228.167, |
|
"eval_steps_per_second": 14.36, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.8089989720424506e-05, |
|
"loss": 5.6259, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.805982664678317e-05, |
|
"loss": 5.605, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 5.651924133300781, |
|
"eval_runtime": 4.362, |
|
"eval_samples_per_second": 229.484, |
|
"eval_steps_per_second": 14.443, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.8029723899289116e-05, |
|
"loss": 5.6153, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.799956082564778e-05, |
|
"loss": 5.6109, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 5.639188766479492, |
|
"eval_runtime": 4.301, |
|
"eval_samples_per_second": 232.739, |
|
"eval_steps_per_second": 14.648, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.796939775200645e-05, |
|
"loss": 5.6064, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.793923467836512e-05, |
|
"loss": 5.619, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 5.662364959716797, |
|
"eval_runtime": 4.3023, |
|
"eval_samples_per_second": 232.664, |
|
"eval_steps_per_second": 14.643, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.790913193087106e-05, |
|
"loss": 5.6079, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.7878968857229727e-05, |
|
"loss": 5.6019, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 5.653491973876953, |
|
"eval_runtime": 4.2771, |
|
"eval_samples_per_second": 234.036, |
|
"eval_steps_per_second": 14.73, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.7848805783588394e-05, |
|
"loss": 5.6022, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.781864270994706e-05, |
|
"loss": 5.6133, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 5.676525115966797, |
|
"eval_runtime": 4.3425, |
|
"eval_samples_per_second": 230.512, |
|
"eval_steps_per_second": 14.508, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.778847963630572e-05, |
|
"loss": 5.6117, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.775837688881168e-05, |
|
"loss": 5.5927, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 5.636429786682129, |
|
"eval_runtime": 4.3367, |
|
"eval_samples_per_second": 230.822, |
|
"eval_steps_per_second": 14.527, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.772821381517034e-05, |
|
"loss": 5.6198, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7698050741529004e-05, |
|
"loss": 5.6119, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 5.666501045227051, |
|
"eval_runtime": 4.3287, |
|
"eval_samples_per_second": 231.246, |
|
"eval_steps_per_second": 14.554, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.766788766788767e-05, |
|
"loss": 5.6199, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.763778492039362e-05, |
|
"loss": 5.602, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 5.631441116333008, |
|
"eval_runtime": 4.3578, |
|
"eval_samples_per_second": 229.703, |
|
"eval_steps_per_second": 14.457, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.760762184675228e-05, |
|
"loss": 5.5997, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.757745877311095e-05, |
|
"loss": 5.605, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 5.630640983581543, |
|
"eval_runtime": 4.2859, |
|
"eval_samples_per_second": 233.558, |
|
"eval_steps_per_second": 14.699, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.7547295699469615e-05, |
|
"loss": 5.6155, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.7517132625828275e-05, |
|
"loss": 5.612, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 5.597905158996582, |
|
"eval_runtime": 4.3535, |
|
"eval_samples_per_second": 229.93, |
|
"eval_steps_per_second": 14.471, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.748696955218695e-05, |
|
"loss": 5.6189, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.745680647854561e-05, |
|
"loss": 5.6184, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 5.636325359344482, |
|
"eval_runtime": 4.3413, |
|
"eval_samples_per_second": 230.575, |
|
"eval_steps_per_second": 14.512, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.742670373105156e-05, |
|
"loss": 5.6078, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.7396540657410225e-05, |
|
"loss": 5.6131, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 5.635697364807129, |
|
"eval_runtime": 4.3624, |
|
"eval_samples_per_second": 229.461, |
|
"eval_steps_per_second": 14.442, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.736637758376889e-05, |
|
"loss": 5.6102, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.733621451012755e-05, |
|
"loss": 5.6063, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 5.6277570724487305, |
|
"eval_runtime": 4.4064, |
|
"eval_samples_per_second": 227.169, |
|
"eval_steps_per_second": 14.297, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.730605143648622e-05, |
|
"loss": 5.6061, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.727594868899217e-05, |
|
"loss": 5.5978, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 5.661520004272461, |
|
"eval_runtime": 4.3347, |
|
"eval_samples_per_second": 230.925, |
|
"eval_steps_per_second": 14.534, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.7245785615350836e-05, |
|
"loss": 5.6161, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.7215622541709496e-05, |
|
"loss": 5.6034, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 5.591000080108643, |
|
"eval_runtime": 4.3807, |
|
"eval_samples_per_second": 228.503, |
|
"eval_steps_per_second": 14.381, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.718545946806817e-05, |
|
"loss": 5.6148, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.715535672057411e-05, |
|
"loss": 5.6112, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 5.652878284454346, |
|
"eval_runtime": 6.5244, |
|
"eval_samples_per_second": 153.423, |
|
"eval_steps_per_second": 9.656, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.712519364693278e-05, |
|
"loss": 5.6086, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.709503057329145e-05, |
|
"loss": 5.5992, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 5.627796173095703, |
|
"eval_runtime": 4.3064, |
|
"eval_samples_per_second": 232.446, |
|
"eval_steps_per_second": 14.629, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.706486749965011e-05, |
|
"loss": 5.6003, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.7034764752156056e-05, |
|
"loss": 5.6099, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 5.642346382141113, |
|
"eval_runtime": 4.4977, |
|
"eval_samples_per_second": 222.559, |
|
"eval_steps_per_second": 14.007, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.700460167851472e-05, |
|
"loss": 5.5956, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.697443860487339e-05, |
|
"loss": 5.6031, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 5.631239891052246, |
|
"eval_runtime": 5.8594, |
|
"eval_samples_per_second": 170.837, |
|
"eval_steps_per_second": 10.752, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.694427553123205e-05, |
|
"loss": 5.604, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6914112457590724e-05, |
|
"loss": 5.6015, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 5.639444351196289, |
|
"eval_runtime": 4.3658, |
|
"eval_samples_per_second": 229.284, |
|
"eval_steps_per_second": 14.43, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.688400971009667e-05, |
|
"loss": 5.6062, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6853846636455334e-05, |
|
"loss": 5.6041, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 5.653528690338135, |
|
"eval_runtime": 4.4108, |
|
"eval_samples_per_second": 226.943, |
|
"eval_steps_per_second": 14.283, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6823683562814e-05, |
|
"loss": 5.6039, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.679352048917267e-05, |
|
"loss": 5.6027, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 5.654060363769531, |
|
"eval_runtime": 4.3047, |
|
"eval_samples_per_second": 232.538, |
|
"eval_steps_per_second": 14.635, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.676341774167861e-05, |
|
"loss": 5.5953, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.673325466803728e-05, |
|
"loss": 5.6065, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 5.660271167755127, |
|
"eval_runtime": 4.4687, |
|
"eval_samples_per_second": 224.002, |
|
"eval_steps_per_second": 14.098, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6703091594395945e-05, |
|
"loss": 5.6137, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6672928520754605e-05, |
|
"loss": 5.601, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 5.656097888946533, |
|
"eval_runtime": 4.2208, |
|
"eval_samples_per_second": 237.157, |
|
"eval_steps_per_second": 14.926, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.664282577326056e-05, |
|
"loss": 5.6, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.661266269961922e-05, |
|
"loss": 5.6075, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 5.628451824188232, |
|
"eval_runtime": 4.3448, |
|
"eval_samples_per_second": 230.39, |
|
"eval_steps_per_second": 14.5, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.658249962597789e-05, |
|
"loss": 5.5945, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6552336552336555e-05, |
|
"loss": 5.608, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 5.617640018463135, |
|
"eval_runtime": 4.3432, |
|
"eval_samples_per_second": 230.475, |
|
"eval_steps_per_second": 14.505, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.652217347869522e-05, |
|
"loss": 5.5962, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6492070731201165e-05, |
|
"loss": 5.5969, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 5.676702976226807, |
|
"eval_runtime": 4.3353, |
|
"eval_samples_per_second": 230.893, |
|
"eval_steps_per_second": 14.532, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.646190765755983e-05, |
|
"loss": 5.6165, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.64317445839185e-05, |
|
"loss": 5.6145, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 5.627898693084717, |
|
"eval_runtime": 4.3201, |
|
"eval_samples_per_second": 231.706, |
|
"eval_steps_per_second": 14.583, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6401581510277166e-05, |
|
"loss": 5.6048, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6371418436635826e-05, |
|
"loss": 5.6053, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 5.602691650390625, |
|
"eval_runtime": 6.0129, |
|
"eval_samples_per_second": 166.476, |
|
"eval_steps_per_second": 10.478, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.63412553629945e-05, |
|
"loss": 5.6042, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.631109228935316e-05, |
|
"loss": 5.5962, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 5.625843048095703, |
|
"eval_runtime": 4.3278, |
|
"eval_samples_per_second": 231.293, |
|
"eval_steps_per_second": 14.557, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.62809895418591e-05, |
|
"loss": 5.6113, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6250826468217777e-05, |
|
"loss": 5.6022, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 5.665659427642822, |
|
"eval_runtime": 4.359, |
|
"eval_samples_per_second": 229.642, |
|
"eval_steps_per_second": 14.453, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.622066339457644e-05, |
|
"loss": 5.6051, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6190500320935104e-05, |
|
"loss": 5.6086, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 5.674694538116455, |
|
"eval_runtime": 4.2969, |
|
"eval_samples_per_second": 232.959, |
|
"eval_steps_per_second": 14.662, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.616033724729377e-05, |
|
"loss": 5.5967, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.613023449979972e-05, |
|
"loss": 5.6055, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 5.6704535484313965, |
|
"eval_runtime": 4.3875, |
|
"eval_samples_per_second": 228.15, |
|
"eval_steps_per_second": 14.359, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.610007142615838e-05, |
|
"loss": 5.5994, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6069908352517054e-05, |
|
"loss": 5.6073, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 5.617954730987549, |
|
"eval_runtime": 4.3854, |
|
"eval_samples_per_second": 228.258, |
|
"eval_steps_per_second": 14.366, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6039745278875714e-05, |
|
"loss": 5.5921, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.600958220523438e-05, |
|
"loss": 5.6037, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 5.635035991668701, |
|
"eval_runtime": 4.3159, |
|
"eval_samples_per_second": 231.935, |
|
"eval_steps_per_second": 14.597, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.597941913159305e-05, |
|
"loss": 5.6059, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.5949316384099e-05, |
|
"loss": 5.6097, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 5.596395015716553, |
|
"eval_runtime": 4.4067, |
|
"eval_samples_per_second": 227.157, |
|
"eval_steps_per_second": 14.297, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.591915331045766e-05, |
|
"loss": 5.5941, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5888990236816325e-05, |
|
"loss": 5.5931, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 5.672845840454102, |
|
"eval_runtime": 4.3691, |
|
"eval_samples_per_second": 229.106, |
|
"eval_steps_per_second": 14.419, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.585882716317499e-05, |
|
"loss": 5.6034, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.582866408953366e-05, |
|
"loss": 5.6017, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 5.563544273376465, |
|
"eval_runtime": 4.2928, |
|
"eval_samples_per_second": 233.179, |
|
"eval_steps_per_second": 14.676, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.57985613420396e-05, |
|
"loss": 5.6184, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.576839826839827e-05, |
|
"loss": 5.5987, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 5.619165420532227, |
|
"eval_runtime": 4.4266, |
|
"eval_samples_per_second": 226.133, |
|
"eval_steps_per_second": 14.232, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5738235194756936e-05, |
|
"loss": 5.6069, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5708072121115596e-05, |
|
"loss": 5.6052, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 5.640077114105225, |
|
"eval_runtime": 4.304, |
|
"eval_samples_per_second": 232.574, |
|
"eval_steps_per_second": 14.638, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.567790904747427e-05, |
|
"loss": 5.6022, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.564780629998021e-05, |
|
"loss": 5.5994, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 5.631266117095947, |
|
"eval_runtime": 4.339, |
|
"eval_samples_per_second": 230.698, |
|
"eval_steps_per_second": 14.519, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.561764322633888e-05, |
|
"loss": 5.6028, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.5587480152697546e-05, |
|
"loss": 5.6057, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 5.639108180999756, |
|
"eval_runtime": 4.3269, |
|
"eval_samples_per_second": 231.343, |
|
"eval_steps_per_second": 14.56, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.555731707905621e-05, |
|
"loss": 5.6019, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.552715400541487e-05, |
|
"loss": 5.6002, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 5.599489212036133, |
|
"eval_runtime": 4.2746, |
|
"eval_samples_per_second": 234.176, |
|
"eval_steps_per_second": 14.738, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.549705125792083e-05, |
|
"loss": 5.604, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.546688818427949e-05, |
|
"loss": 5.6023, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 5.6258158683776855, |
|
"eval_runtime": 4.3904, |
|
"eval_samples_per_second": 227.997, |
|
"eval_steps_per_second": 14.349, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.543672511063816e-05, |
|
"loss": 5.5952, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5406562036996824e-05, |
|
"loss": 5.5972, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 5.620538711547852, |
|
"eval_runtime": 4.4432, |
|
"eval_samples_per_second": 225.29, |
|
"eval_steps_per_second": 14.179, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.537639896335549e-05, |
|
"loss": 5.5949, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5346296215861434e-05, |
|
"loss": 5.5929, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 5.622025489807129, |
|
"eval_runtime": 6.0726, |
|
"eval_samples_per_second": 164.839, |
|
"eval_steps_per_second": 10.374, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.53161331422201e-05, |
|
"loss": 5.6055, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.528597006857877e-05, |
|
"loss": 5.6057, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 5.636081218719482, |
|
"eval_runtime": 4.3951, |
|
"eval_samples_per_second": 227.752, |
|
"eval_steps_per_second": 14.334, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.525580699493743e-05, |
|
"loss": 5.592, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.5225704247443384e-05, |
|
"loss": 5.5927, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 5.613470554351807, |
|
"eval_runtime": 4.2808, |
|
"eval_samples_per_second": 233.836, |
|
"eval_steps_per_second": 14.717, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.5195541173802044e-05, |
|
"loss": 5.5995, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.516537810016071e-05, |
|
"loss": 5.5932, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 5.671243667602539, |
|
"eval_runtime": 4.3622, |
|
"eval_samples_per_second": 229.469, |
|
"eval_steps_per_second": 14.442, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.513521502651937e-05, |
|
"loss": 5.5976, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.510511227902533e-05, |
|
"loss": 5.5954, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 5.645402431488037, |
|
"eval_runtime": 4.3336, |
|
"eval_samples_per_second": 230.987, |
|
"eval_steps_per_second": 14.538, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.507494920538399e-05, |
|
"loss": 5.5893, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.5044786131742655e-05, |
|
"loss": 5.6012, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 5.6515679359436035, |
|
"eval_runtime": 4.3246, |
|
"eval_samples_per_second": 231.466, |
|
"eval_steps_per_second": 14.568, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.501462305810132e-05, |
|
"loss": 5.6031, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.498445998445999e-05, |
|
"loss": 5.5963, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 5.553864479064941, |
|
"eval_runtime": 4.4624, |
|
"eval_samples_per_second": 224.32, |
|
"eval_steps_per_second": 14.118, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.495435723696593e-05, |
|
"loss": 5.5976, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.492425448947188e-05, |
|
"loss": 5.5912, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 5.589954853057861, |
|
"eval_runtime": 4.401, |
|
"eval_samples_per_second": 227.45, |
|
"eval_steps_per_second": 14.315, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.489409141583055e-05, |
|
"loss": 5.6036, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.486392834218921e-05, |
|
"loss": 5.5945, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 5.610245227813721, |
|
"eval_runtime": 4.3039, |
|
"eval_samples_per_second": 232.58, |
|
"eval_steps_per_second": 14.638, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.483376526854788e-05, |
|
"loss": 5.6045, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.480360219490654e-05, |
|
"loss": 5.5801, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 5.662520885467529, |
|
"eval_runtime": 4.4164, |
|
"eval_samples_per_second": 226.657, |
|
"eval_steps_per_second": 14.265, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.477343912126521e-05, |
|
"loss": 5.5911, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.4743276047623876e-05, |
|
"loss": 5.5971, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 5.606277942657471, |
|
"eval_runtime": 4.2933, |
|
"eval_samples_per_second": 233.156, |
|
"eval_steps_per_second": 14.674, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.471311297398254e-05, |
|
"loss": 5.5897, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.4683010226488486e-05, |
|
"loss": 5.5806, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 5.616698741912842, |
|
"eval_runtime": 4.358, |
|
"eval_samples_per_second": 229.693, |
|
"eval_steps_per_second": 14.456, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.465284715284716e-05, |
|
"loss": 5.5886, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.462268407920582e-05, |
|
"loss": 5.6036, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 5.618276119232178, |
|
"eval_runtime": 4.391, |
|
"eval_samples_per_second": 227.965, |
|
"eval_steps_per_second": 14.347, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.459252100556449e-05, |
|
"loss": 5.5938, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.4562357931923154e-05, |
|
"loss": 5.6139, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 5.62522554397583, |
|
"eval_runtime": 4.3609, |
|
"eval_samples_per_second": 229.539, |
|
"eval_steps_per_second": 14.447, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.453231551057638e-05, |
|
"loss": 5.5998, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.4502152436935046e-05, |
|
"loss": 5.5781, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 5.601036548614502, |
|
"eval_runtime": 4.3881, |
|
"eval_samples_per_second": 228.114, |
|
"eval_steps_per_second": 14.357, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.447198936329371e-05, |
|
"loss": 5.5866, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.444182628965238e-05, |
|
"loss": 5.5866, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 5.622751235961914, |
|
"eval_runtime": 4.361, |
|
"eval_samples_per_second": 229.534, |
|
"eval_steps_per_second": 14.446, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.441166321601104e-05, |
|
"loss": 5.5846, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.438150014236971e-05, |
|
"loss": 5.5912, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 5.602281093597412, |
|
"eval_runtime": 4.3383, |
|
"eval_samples_per_second": 230.736, |
|
"eval_steps_per_second": 14.522, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.4351337068728374e-05, |
|
"loss": 5.584, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.432117399508704e-05, |
|
"loss": 5.5935, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 5.619230270385742, |
|
"eval_runtime": 4.4126, |
|
"eval_samples_per_second": 226.848, |
|
"eval_steps_per_second": 14.277, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.429107124759299e-05, |
|
"loss": 5.5939, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.426090817395166e-05, |
|
"loss": 5.5879, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 5.617980480194092, |
|
"eval_runtime": 4.483, |
|
"eval_samples_per_second": 223.286, |
|
"eval_steps_per_second": 14.053, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.423074510031032e-05, |
|
"loss": 5.5785, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.4200582026668985e-05, |
|
"loss": 5.5968, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 5.606536865234375, |
|
"eval_runtime": 4.3562, |
|
"eval_samples_per_second": 229.79, |
|
"eval_steps_per_second": 14.462, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.417041895302765e-05, |
|
"loss": 5.5833, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.41403162055336e-05, |
|
"loss": 5.5899, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 5.632786750793457, |
|
"eval_runtime": 4.4039, |
|
"eval_samples_per_second": 227.3, |
|
"eval_steps_per_second": 14.306, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.411015313189226e-05, |
|
"loss": 5.6022, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.407999005825093e-05, |
|
"loss": 5.5924, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 5.664926528930664, |
|
"eval_runtime": 4.3562, |
|
"eval_samples_per_second": 229.789, |
|
"eval_steps_per_second": 14.462, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.4049826984609595e-05, |
|
"loss": 5.5992, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.4019663910968256e-05, |
|
"loss": 5.594, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 5.613445281982422, |
|
"eval_runtime": 4.3529, |
|
"eval_samples_per_second": 229.961, |
|
"eval_steps_per_second": 14.473, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.398956116347421e-05, |
|
"loss": 5.595, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.395939808983287e-05, |
|
"loss": 5.5779, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 5.641766548156738, |
|
"eval_runtime": 4.3867, |
|
"eval_samples_per_second": 228.188, |
|
"eval_steps_per_second": 14.361, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.392923501619154e-05, |
|
"loss": 5.5935, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.3899071942550206e-05, |
|
"loss": 5.5857, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 5.578934669494629, |
|
"eval_runtime": 4.3746, |
|
"eval_samples_per_second": 228.819, |
|
"eval_steps_per_second": 14.401, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.386890886890887e-05, |
|
"loss": 5.5911, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.383874579526753e-05, |
|
"loss": 5.5854, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 5.591177463531494, |
|
"eval_runtime": 4.3359, |
|
"eval_samples_per_second": 230.861, |
|
"eval_steps_per_second": 14.53, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.38085827216262e-05, |
|
"loss": 5.5984, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.377847997413215e-05, |
|
"loss": 5.5851, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 5.609918117523193, |
|
"eval_runtime": 6.2577, |
|
"eval_samples_per_second": 159.962, |
|
"eval_steps_per_second": 10.068, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.374831690049082e-05, |
|
"loss": 5.5783, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.371815382684948e-05, |
|
"loss": 5.5891, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 5.6276116371154785, |
|
"eval_runtime": 4.3901, |
|
"eval_samples_per_second": 228.014, |
|
"eval_steps_per_second": 14.351, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.368799075320815e-05, |
|
"loss": 5.5805, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.365788800571409e-05, |
|
"loss": 5.5853, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 5.602240562438965, |
|
"eval_runtime": 4.2864, |
|
"eval_samples_per_second": 233.528, |
|
"eval_steps_per_second": 14.698, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.362772493207276e-05, |
|
"loss": 5.5979, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.359756185843143e-05, |
|
"loss": 5.5948, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 5.630936145782471, |
|
"eval_runtime": 4.2788, |
|
"eval_samples_per_second": 233.942, |
|
"eval_steps_per_second": 14.724, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.356739878479009e-05, |
|
"loss": 5.5974, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.3537235711148754e-05, |
|
"loss": 5.5825, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 5.636594772338867, |
|
"eval_runtime": 4.3052, |
|
"eval_samples_per_second": 232.51, |
|
"eval_steps_per_second": 14.634, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.3507132963654704e-05, |
|
"loss": 5.5782, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.347696989001337e-05, |
|
"loss": 5.5843, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 5.65947151184082, |
|
"eval_runtime": 4.3453, |
|
"eval_samples_per_second": 230.365, |
|
"eval_steps_per_second": 14.498, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.344680681637203e-05, |
|
"loss": 5.5752, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.341670406887799e-05, |
|
"loss": 5.5732, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 5.613409519195557, |
|
"eval_runtime": 4.3105, |
|
"eval_samples_per_second": 232.225, |
|
"eval_steps_per_second": 14.616, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.338654099523665e-05, |
|
"loss": 5.5945, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.3356377921595315e-05, |
|
"loss": 5.5779, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 5.653366565704346, |
|
"eval_runtime": 4.3619, |
|
"eval_samples_per_second": 229.489, |
|
"eval_steps_per_second": 14.443, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.332621484795398e-05, |
|
"loss": 5.592, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.329605177431265e-05, |
|
"loss": 5.5827, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 5.661462783813477, |
|
"eval_runtime": 4.2952, |
|
"eval_samples_per_second": 233.05, |
|
"eval_steps_per_second": 14.668, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.326588870067131e-05, |
|
"loss": 5.5852, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.3235725627029976e-05, |
|
"loss": 5.5721, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 5.621175289154053, |
|
"eval_runtime": 4.3045, |
|
"eval_samples_per_second": 232.548, |
|
"eval_steps_per_second": 14.636, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.320556255338864e-05, |
|
"loss": 5.5806, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.317539947974731e-05, |
|
"loss": 5.5851, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 5.6257219314575195, |
|
"eval_runtime": 4.3249, |
|
"eval_samples_per_second": 231.453, |
|
"eval_steps_per_second": 14.567, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.314529673225326e-05, |
|
"loss": 5.5875, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.311513365861192e-05, |
|
"loss": 5.5788, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 5.616583347320557, |
|
"eval_runtime": 4.3483, |
|
"eval_samples_per_second": 230.203, |
|
"eval_steps_per_second": 14.488, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.3084970584970586e-05, |
|
"loss": 5.5866, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.305480751132925e-05, |
|
"loss": 5.5697, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 5.639812469482422, |
|
"eval_runtime": 4.4373, |
|
"eval_samples_per_second": 225.589, |
|
"eval_steps_per_second": 14.198, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.30247047638352e-05, |
|
"loss": 5.5711, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.299454169019386e-05, |
|
"loss": 5.5933, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 5.632999897003174, |
|
"eval_runtime": 4.4375, |
|
"eval_samples_per_second": 225.578, |
|
"eval_steps_per_second": 14.197, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.296437861655253e-05, |
|
"loss": 5.5914, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.29342155429112e-05, |
|
"loss": 5.5818, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 5.581757068634033, |
|
"eval_runtime": 4.3378, |
|
"eval_samples_per_second": 230.763, |
|
"eval_steps_per_second": 14.524, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.2904052469269864e-05, |
|
"loss": 5.5744, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.2873889395628524e-05, |
|
"loss": 5.577, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 5.626399040222168, |
|
"eval_runtime": 4.3623, |
|
"eval_samples_per_second": 229.464, |
|
"eval_steps_per_second": 14.442, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.284378664813448e-05, |
|
"loss": 5.5862, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.281362357449314e-05, |
|
"loss": 5.57, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 5.625292778015137, |
|
"eval_runtime": 6.6232, |
|
"eval_samples_per_second": 151.136, |
|
"eval_steps_per_second": 9.512, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.278346050085181e-05, |
|
"loss": 5.5883, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.2753297427210475e-05, |
|
"loss": 5.583, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_loss": 5.616182804107666, |
|
"eval_runtime": 4.3373, |
|
"eval_samples_per_second": 230.789, |
|
"eval_steps_per_second": 14.525, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.272313435356914e-05, |
|
"loss": 5.5543, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.26929712799278e-05, |
|
"loss": 5.5804, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_loss": 5.654087066650391, |
|
"eval_runtime": 5.9497, |
|
"eval_samples_per_second": 168.245, |
|
"eval_steps_per_second": 10.589, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.266280820628647e-05, |
|
"loss": 5.5752, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.2632645132645136e-05, |
|
"loss": 5.5813, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_loss": 5.568077564239502, |
|
"eval_runtime": 4.304, |
|
"eval_samples_per_second": 232.575, |
|
"eval_steps_per_second": 14.638, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.2602482059003796e-05, |
|
"loss": 5.5821, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.257231898536246e-05, |
|
"loss": 5.5683, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 5.624704837799072, |
|
"eval_runtime": 4.2307, |
|
"eval_samples_per_second": 236.604, |
|
"eval_steps_per_second": 14.891, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.254221623786841e-05, |
|
"loss": 5.5683, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.251205316422708e-05, |
|
"loss": 5.5807, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 5.592767238616943, |
|
"eval_runtime": 4.2406, |
|
"eval_samples_per_second": 236.051, |
|
"eval_steps_per_second": 14.856, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.248189009058574e-05, |
|
"loss": 5.5905, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.2451787343091696e-05, |
|
"loss": 5.5783, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 5.550808906555176, |
|
"eval_runtime": 6.3243, |
|
"eval_samples_per_second": 158.277, |
|
"eval_steps_per_second": 9.962, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.2421624269450356e-05, |
|
"loss": 5.5827, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.239146119580902e-05, |
|
"loss": 5.5793, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_loss": 5.641295433044434, |
|
"eval_runtime": 4.2972, |
|
"eval_samples_per_second": 232.941, |
|
"eval_steps_per_second": 14.661, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.236129812216769e-05, |
|
"loss": 5.5852, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.233113504852636e-05, |
|
"loss": 5.5576, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_loss": 5.595691680908203, |
|
"eval_runtime": 4.2537, |
|
"eval_samples_per_second": 235.322, |
|
"eval_steps_per_second": 14.81, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.230097197488502e-05, |
|
"loss": 5.5686, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.227080890124369e-05, |
|
"loss": 5.575, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_loss": 5.610649108886719, |
|
"eval_runtime": 4.3768, |
|
"eval_samples_per_second": 228.704, |
|
"eval_steps_per_second": 14.394, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.224064582760235e-05, |
|
"loss": 5.5757, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.221048275396102e-05, |
|
"loss": 5.5751, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 5.616249084472656, |
|
"eval_runtime": 4.2765, |
|
"eval_samples_per_second": 234.069, |
|
"eval_steps_per_second": 14.732, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.218038000646697e-05, |
|
"loss": 5.5724, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.215021693282563e-05, |
|
"loss": 5.5678, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 5.58726167678833, |
|
"eval_runtime": 4.3531, |
|
"eval_samples_per_second": 229.953, |
|
"eval_steps_per_second": 14.473, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.2120053859184295e-05, |
|
"loss": 5.5696, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.208989078554296e-05, |
|
"loss": 5.5762, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 5.5941548347473145, |
|
"eval_runtime": 4.1805, |
|
"eval_samples_per_second": 239.445, |
|
"eval_steps_per_second": 15.07, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.205978803804891e-05, |
|
"loss": 5.5715, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.202962496440757e-05, |
|
"loss": 5.5698, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 5.603238582611084, |
|
"eval_runtime": 4.2335, |
|
"eval_samples_per_second": 236.45, |
|
"eval_steps_per_second": 14.881, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.199946189076624e-05, |
|
"loss": 5.5834, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.1969298817124905e-05, |
|
"loss": 5.5719, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 5.640628814697266, |
|
"eval_runtime": 4.2976, |
|
"eval_samples_per_second": 232.922, |
|
"eval_steps_per_second": 14.659, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.193913574348357e-05, |
|
"loss": 5.5841, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.190903299598952e-05, |
|
"loss": 5.5852, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 5.6316609382629395, |
|
"eval_runtime": 6.6588, |
|
"eval_samples_per_second": 150.328, |
|
"eval_steps_per_second": 9.461, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.187886992234819e-05, |
|
"loss": 5.5728, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.184870684870685e-05, |
|
"loss": 5.5794, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 5.637535095214844, |
|
"eval_runtime": 4.2729, |
|
"eval_samples_per_second": 234.266, |
|
"eval_steps_per_second": 14.744, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.1818543775065516e-05, |
|
"loss": 5.5692, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.1788441027571465e-05, |
|
"loss": 5.5912, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 5.601639747619629, |
|
"eval_runtime": 4.2748, |
|
"eval_samples_per_second": 234.165, |
|
"eval_steps_per_second": 14.738, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.175833828007741e-05, |
|
"loss": 5.577, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.1728175206436075e-05, |
|
"loss": 5.5698, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 5.601426124572754, |
|
"eval_runtime": 4.2879, |
|
"eval_samples_per_second": 233.449, |
|
"eval_steps_per_second": 14.693, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.169801213279474e-05, |
|
"loss": 5.5745, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.166784905915341e-05, |
|
"loss": 5.5658, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 5.584649562835693, |
|
"eval_runtime": 4.2401, |
|
"eval_samples_per_second": 236.077, |
|
"eval_steps_per_second": 14.858, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.163774631165936e-05, |
|
"loss": 5.5632, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.1607583238018026e-05, |
|
"loss": 5.5701, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 5.594232082366943, |
|
"eval_runtime": 6.4608, |
|
"eval_samples_per_second": 154.934, |
|
"eval_steps_per_second": 9.751, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.1577420164376686e-05, |
|
"loss": 5.5869, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.154725709073535e-05, |
|
"loss": 5.5681, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 5.6190314292907715, |
|
"eval_runtime": 4.2645, |
|
"eval_samples_per_second": 234.731, |
|
"eval_steps_per_second": 14.773, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.151709401709402e-05, |
|
"loss": 5.5742, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.148693094345269e-05, |
|
"loss": 5.5584, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 5.601367950439453, |
|
"eval_runtime": 6.1261, |
|
"eval_samples_per_second": 163.4, |
|
"eval_steps_per_second": 10.284, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.145682819595863e-05, |
|
"loss": 5.5774, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.14266651223173e-05, |
|
"loss": 5.5788, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 5.602400302886963, |
|
"eval_runtime": 4.2475, |
|
"eval_samples_per_second": 235.665, |
|
"eval_steps_per_second": 14.832, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.1396502048675963e-05, |
|
"loss": 5.5696, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.136633897503463e-05, |
|
"loss": 5.5806, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 5.596291542053223, |
|
"eval_runtime": 4.3852, |
|
"eval_samples_per_second": 228.268, |
|
"eval_steps_per_second": 14.366, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.13361759013933e-05, |
|
"loss": 5.5689, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.130601282775196e-05, |
|
"loss": 5.555, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 5.634425163269043, |
|
"eval_runtime": 4.2821, |
|
"eval_samples_per_second": 233.764, |
|
"eval_steps_per_second": 14.712, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.1275849754110625e-05, |
|
"loss": 5.5582, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.124568668046929e-05, |
|
"loss": 5.5638, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_loss": 5.537110328674316, |
|
"eval_runtime": 4.2827, |
|
"eval_samples_per_second": 233.729, |
|
"eval_steps_per_second": 14.71, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.121558393297524e-05, |
|
"loss": 5.559, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.11854208593339e-05, |
|
"loss": 5.5625, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_loss": 5.598823070526123, |
|
"eval_runtime": 4.1935, |
|
"eval_samples_per_second": 238.704, |
|
"eval_steps_per_second": 15.023, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.115525778569257e-05, |
|
"loss": 5.5682, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.1125094712051235e-05, |
|
"loss": 5.5701, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_loss": 5.606781005859375, |
|
"eval_runtime": 4.1581, |
|
"eval_samples_per_second": 240.735, |
|
"eval_steps_per_second": 15.151, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.1094991964557185e-05, |
|
"loss": 5.5559, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.1064889217063134e-05, |
|
"loss": 5.5611, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 5.584975242614746, |
|
"eval_runtime": 4.1497, |
|
"eval_samples_per_second": 241.223, |
|
"eval_steps_per_second": 15.182, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.10347261434218e-05, |
|
"loss": 5.5758, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.100456306978046e-05, |
|
"loss": 5.5857, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 5.614663600921631, |
|
"eval_runtime": 4.0061, |
|
"eval_samples_per_second": 249.869, |
|
"eval_steps_per_second": 15.726, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.097439999613913e-05, |
|
"loss": 5.5646, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.0944236922497795e-05, |
|
"loss": 5.5609, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 5.600166320800781, |
|
"eval_runtime": 3.7879, |
|
"eval_samples_per_second": 264.264, |
|
"eval_steps_per_second": 16.632, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.0914073848856456e-05, |
|
"loss": 5.5657, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.088391077521512e-05, |
|
"loss": 5.5646, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 5.617602825164795, |
|
"eval_runtime": 3.9864, |
|
"eval_samples_per_second": 251.102, |
|
"eval_steps_per_second": 15.804, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.085374770157379e-05, |
|
"loss": 5.567, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.082364495407974e-05, |
|
"loss": 5.5691, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 5.588697910308838, |
|
"eval_runtime": 4.0872, |
|
"eval_samples_per_second": 244.913, |
|
"eval_steps_per_second": 15.414, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.07934818804384e-05, |
|
"loss": 5.5731, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.076331880679707e-05, |
|
"loss": 5.5518, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 5.6083807945251465, |
|
"eval_runtime": 4.1744, |
|
"eval_samples_per_second": 239.797, |
|
"eval_steps_per_second": 15.092, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.073315573315573e-05, |
|
"loss": 5.5795, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.070305298566168e-05, |
|
"loss": 5.5704, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_loss": 5.620527744293213, |
|
"eval_runtime": 4.2147, |
|
"eval_samples_per_second": 237.505, |
|
"eval_steps_per_second": 14.948, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.067295023816763e-05, |
|
"loss": 5.5635, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.06427871645263e-05, |
|
"loss": 5.5752, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_loss": 5.60969877243042, |
|
"eval_runtime": 4.206, |
|
"eval_samples_per_second": 237.991, |
|
"eval_steps_per_second": 14.978, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.061262409088496e-05, |
|
"loss": 5.5772, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.058246101724363e-05, |
|
"loss": 5.5723, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 5.585223197937012, |
|
"eval_runtime": 4.1426, |
|
"eval_samples_per_second": 241.634, |
|
"eval_steps_per_second": 15.208, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.055229794360229e-05, |
|
"loss": 5.5594, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.052213486996096e-05, |
|
"loss": 5.5613, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 5.594601631164551, |
|
"eval_runtime": 4.029, |
|
"eval_samples_per_second": 248.45, |
|
"eval_steps_per_second": 15.637, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.0491971796319624e-05, |
|
"loss": 5.5452, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.0461808722678287e-05, |
|
"loss": 5.5723, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 5.622194290161133, |
|
"eval_runtime": 4.1571, |
|
"eval_samples_per_second": 240.791, |
|
"eval_steps_per_second": 15.155, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.0431705975184237e-05, |
|
"loss": 5.5765, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.04015429015429e-05, |
|
"loss": 5.5653, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 5.609323024749756, |
|
"eval_runtime": 4.2445, |
|
"eval_samples_per_second": 235.834, |
|
"eval_steps_per_second": 14.843, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.037137982790157e-05, |
|
"loss": 5.56, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.034121675426023e-05, |
|
"loss": 5.5604, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 5.608053207397461, |
|
"eval_runtime": 4.2784, |
|
"eval_samples_per_second": 233.968, |
|
"eval_steps_per_second": 14.725, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.03110536806189e-05, |
|
"loss": 5.5698, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.0280950933124848e-05, |
|
"loss": 5.5602, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 5.6086883544921875, |
|
"eval_runtime": 4.2993, |
|
"eval_samples_per_second": 232.828, |
|
"eval_steps_per_second": 14.654, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0250787859483515e-05, |
|
"loss": 5.5624, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0220624785842178e-05, |
|
"loss": 5.577, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 5.620945453643799, |
|
"eval_runtime": 4.2859, |
|
"eval_samples_per_second": 233.559, |
|
"eval_steps_per_second": 14.699, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0190461712200845e-05, |
|
"loss": 5.5651, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0160419290854074e-05, |
|
"loss": 5.56, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 5.617950439453125, |
|
"eval_runtime": 4.181, |
|
"eval_samples_per_second": 239.419, |
|
"eval_steps_per_second": 15.068, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0130256217212737e-05, |
|
"loss": 5.5633, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0100093143571408e-05, |
|
"loss": 5.567, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 5.581989288330078, |
|
"eval_runtime": 4.1844, |
|
"eval_samples_per_second": 239.221, |
|
"eval_steps_per_second": 15.056, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.0069930069930068e-05, |
|
"loss": 5.5607, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.003976699628874e-05, |
|
"loss": 5.5698, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 5.626122951507568, |
|
"eval_runtime": 4.2366, |
|
"eval_samples_per_second": 236.277, |
|
"eval_steps_per_second": 14.871, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.00096039226474e-05, |
|
"loss": 5.5686, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.997944084900607e-05, |
|
"loss": 5.5682, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 5.587873458862305, |
|
"eval_runtime": 4.2898, |
|
"eval_samples_per_second": 233.342, |
|
"eval_steps_per_second": 14.686, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.9949277775364732e-05, |
|
"loss": 5.5734, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.99191147017234e-05, |
|
"loss": 5.5789, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 5.579086780548096, |
|
"eval_runtime": 4.247, |
|
"eval_samples_per_second": 235.697, |
|
"eval_steps_per_second": 14.834, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9889011954229346e-05, |
|
"loss": 5.5631, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9858848880588013e-05, |
|
"loss": 5.5566, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 5.596020221710205, |
|
"eval_runtime": 3.9959, |
|
"eval_samples_per_second": 250.505, |
|
"eval_steps_per_second": 15.766, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9828685806946676e-05, |
|
"loss": 5.5698, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9798522733305346e-05, |
|
"loss": 5.566, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 5.61091947555542, |
|
"eval_runtime": 4.2033, |
|
"eval_samples_per_second": 238.144, |
|
"eval_steps_per_second": 14.988, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9768419985811293e-05, |
|
"loss": 5.5673, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.973825691216996e-05, |
|
"loss": 5.5628, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_loss": 5.610008239746094, |
|
"eval_runtime": 4.2081, |
|
"eval_samples_per_second": 237.875, |
|
"eval_steps_per_second": 14.971, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.9708093838528623e-05, |
|
"loss": 5.5706, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.967793076488729e-05, |
|
"loss": 5.5726, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_loss": 5.600627422332764, |
|
"eval_runtime": 4.3279, |
|
"eval_samples_per_second": 231.29, |
|
"eval_steps_per_second": 14.557, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.9647767691245954e-05, |
|
"loss": 5.5503, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.96176649437519e-05, |
|
"loss": 5.5895, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_loss": 5.598173141479492, |
|
"eval_runtime": 4.1467, |
|
"eval_samples_per_second": 241.398, |
|
"eval_steps_per_second": 15.193, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.9587501870110567e-05, |
|
"loss": 5.5667, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.9557399122616513e-05, |
|
"loss": 5.5607, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 5.594642162322998, |
|
"eval_runtime": 4.1122, |
|
"eval_samples_per_second": 243.425, |
|
"eval_steps_per_second": 15.32, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.9527236048975183e-05, |
|
"loss": 5.5682, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.9497072975333844e-05, |
|
"loss": 5.5516, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 5.622577667236328, |
|
"eval_runtime": 3.8072, |
|
"eval_samples_per_second": 262.92, |
|
"eval_steps_per_second": 16.547, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.9466909901692514e-05, |
|
"loss": 5.5667, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.9436746828051178e-05, |
|
"loss": 5.5565, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 5.5851006507873535, |
|
"eval_runtime": 3.8861, |
|
"eval_samples_per_second": 257.585, |
|
"eval_steps_per_second": 16.212, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9406583754409844e-05, |
|
"loss": 5.5713, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9376420680768508e-05, |
|
"loss": 5.5666, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 5.600591659545898, |
|
"eval_runtime": 5.223, |
|
"eval_samples_per_second": 191.654, |
|
"eval_steps_per_second": 12.062, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9346257607127175e-05, |
|
"loss": 5.552, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.931615485963312e-05, |
|
"loss": 5.5644, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 5.608797550201416, |
|
"eval_runtime": 4.1399, |
|
"eval_samples_per_second": 241.796, |
|
"eval_steps_per_second": 15.218, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.928599178599179e-05, |
|
"loss": 5.5612, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.9255828712350452e-05, |
|
"loss": 5.5712, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 5.611100673675537, |
|
"eval_runtime": 4.0817, |
|
"eval_samples_per_second": 245.242, |
|
"eval_steps_per_second": 15.435, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.9225665638709122e-05, |
|
"loss": 5.5511, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.9195562891215068e-05, |
|
"loss": 5.5501, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 5.617428302764893, |
|
"eval_runtime": 4.2409, |
|
"eval_samples_per_second": 236.035, |
|
"eval_steps_per_second": 14.855, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.916539981757373e-05, |
|
"loss": 5.5677, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.91352367439324e-05, |
|
"loss": 5.5761, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 5.5843377113342285, |
|
"eval_runtime": 4.3583, |
|
"eval_samples_per_second": 229.679, |
|
"eval_steps_per_second": 14.455, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.9105073670291062e-05, |
|
"loss": 5.5865, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.907491059664973e-05, |
|
"loss": 5.5604, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 5.591548919677734, |
|
"eval_runtime": 4.2343, |
|
"eval_samples_per_second": 236.401, |
|
"eval_steps_per_second": 14.878, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.9044807849155675e-05, |
|
"loss": 5.5721, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.9014644775514342e-05, |
|
"loss": 5.559, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 5.568837642669678, |
|
"eval_runtime": 4.2724, |
|
"eval_samples_per_second": 234.292, |
|
"eval_steps_per_second": 14.746, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.8984481701873006e-05, |
|
"loss": 5.5726, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.8954318628231676e-05, |
|
"loss": 5.5715, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 5.614943504333496, |
|
"eval_runtime": 4.2628, |
|
"eval_samples_per_second": 234.823, |
|
"eval_steps_per_second": 14.779, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.8924276206884905e-05, |
|
"loss": 5.5576, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.8894113133243572e-05, |
|
"loss": 5.5667, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 5.581839561462402, |
|
"eval_runtime": 4.319, |
|
"eval_samples_per_second": 231.765, |
|
"eval_steps_per_second": 14.587, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.8863950059602236e-05, |
|
"loss": 5.5802, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.88337869859609e-05, |
|
"loss": 5.5684, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 5.611480236053467, |
|
"eval_runtime": 4.2644, |
|
"eval_samples_per_second": 234.732, |
|
"eval_steps_per_second": 14.773, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.8803623912319566e-05, |
|
"loss": 5.5583, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.877346083867823e-05, |
|
"loss": 5.5526, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 5.582302570343018, |
|
"eval_runtime": 4.2931, |
|
"eval_samples_per_second": 233.165, |
|
"eval_steps_per_second": 14.675, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.874335809118418e-05, |
|
"loss": 5.5705, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.8713195017542843e-05, |
|
"loss": 5.5763, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 5.567468643188477, |
|
"eval_runtime": 4.2538, |
|
"eval_samples_per_second": 235.319, |
|
"eval_steps_per_second": 14.81, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.8683031943901513e-05, |
|
"loss": 5.5613, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.8652868870260173e-05, |
|
"loss": 5.5607, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 5.62228536605835, |
|
"eval_runtime": 4.2752, |
|
"eval_samples_per_second": 234.142, |
|
"eval_steps_per_second": 14.736, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.8622705796618844e-05, |
|
"loss": 5.5693, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.859260304912479e-05, |
|
"loss": 5.5688, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 5.592869281768799, |
|
"eval_runtime": 4.2324, |
|
"eval_samples_per_second": 236.51, |
|
"eval_steps_per_second": 14.885, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.8562439975483457e-05, |
|
"loss": 5.5638, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.853227690184212e-05, |
|
"loss": 5.5509, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 5.573236465454102, |
|
"eval_runtime": 4.1603, |
|
"eval_samples_per_second": 240.606, |
|
"eval_steps_per_second": 15.143, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.8502113828200787e-05, |
|
"loss": 5.5607, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.847195075455945e-05, |
|
"loss": 5.5653, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 5.6225433349609375, |
|
"eval_runtime": 4.2135, |
|
"eval_samples_per_second": 237.568, |
|
"eval_steps_per_second": 14.952, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.8441787680918118e-05, |
|
"loss": 5.5625, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.841162460727678e-05, |
|
"loss": 5.5612, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 5.620114326477051, |
|
"eval_runtime": 4.2253, |
|
"eval_samples_per_second": 236.907, |
|
"eval_steps_per_second": 14.91, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.8381461533635452e-05, |
|
"loss": 5.5458, |
|
"step": 358500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.8351358786141398e-05, |
|
"loss": 5.5536, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 5.56666374206543, |
|
"eval_runtime": 4.1642, |
|
"eval_samples_per_second": 240.385, |
|
"eval_steps_per_second": 15.129, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.8321195712500058e-05, |
|
"loss": 5.5679, |
|
"step": 359500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.829103263885873e-05, |
|
"loss": 5.5517, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 5.610234260559082, |
|
"eval_runtime": 4.1981, |
|
"eval_samples_per_second": 238.442, |
|
"eval_steps_per_second": 15.007, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.826086956521739e-05, |
|
"loss": 5.5577, |
|
"step": 360500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.823070649157606e-05, |
|
"loss": 5.5618, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 5.579962730407715, |
|
"eval_runtime": 4.2993, |
|
"eval_samples_per_second": 232.831, |
|
"eval_steps_per_second": 14.654, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.8200603744082005e-05, |
|
"loss": 5.5503, |
|
"step": 361500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.8170500996587955e-05, |
|
"loss": 5.5654, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 5.613447666168213, |
|
"eval_runtime": 4.2494, |
|
"eval_samples_per_second": 235.562, |
|
"eval_steps_per_second": 14.826, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.814033792294662e-05, |
|
"loss": 5.56, |
|
"step": 362500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.811017484930529e-05, |
|
"loss": 5.547, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 5.580785274505615, |
|
"eval_runtime": 4.2913, |
|
"eval_samples_per_second": 233.261, |
|
"eval_steps_per_second": 14.681, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.808001177566395e-05, |
|
"loss": 5.5518, |
|
"step": 363500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.804984870202262e-05, |
|
"loss": 5.5466, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_loss": 5.616759300231934, |
|
"eval_runtime": 4.2299, |
|
"eval_samples_per_second": 236.648, |
|
"eval_steps_per_second": 14.894, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.8019685628381283e-05, |
|
"loss": 5.5547, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.798952255473995e-05, |
|
"loss": 5.5541, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_loss": 5.554247856140137, |
|
"eval_runtime": 4.201, |
|
"eval_samples_per_second": 238.274, |
|
"eval_steps_per_second": 14.996, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.7959359481098613e-05, |
|
"loss": 5.5566, |
|
"step": 365500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.792919640745728e-05, |
|
"loss": 5.5548, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_loss": 5.5636162757873535, |
|
"eval_runtime": 4.2679, |
|
"eval_samples_per_second": 234.542, |
|
"eval_steps_per_second": 14.761, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.7899093659963227e-05, |
|
"loss": 5.5579, |
|
"step": 366500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.786893058632189e-05, |
|
"loss": 5.5524, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 5.584085941314697, |
|
"eval_runtime": 4.2363, |
|
"eval_samples_per_second": 236.294, |
|
"eval_steps_per_second": 14.872, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.7838767512680557e-05, |
|
"loss": 5.5657, |
|
"step": 367500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.780860443903922e-05, |
|
"loss": 5.5628, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 5.601650238037109, |
|
"eval_runtime": 4.2707, |
|
"eval_samples_per_second": 234.387, |
|
"eval_steps_per_second": 14.752, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.7778501691545174e-05, |
|
"loss": 5.5471, |
|
"step": 368500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.7748338617903834e-05, |
|
"loss": 5.5603, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_loss": 5.596165657043457, |
|
"eval_runtime": 4.23, |
|
"eval_samples_per_second": 236.643, |
|
"eval_steps_per_second": 14.894, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.7718175544262504e-05, |
|
"loss": 5.5684, |
|
"step": 369500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.7688012470621168e-05, |
|
"loss": 5.5528, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_loss": 5.604942798614502, |
|
"eval_runtime": 4.1427, |
|
"eval_samples_per_second": 241.628, |
|
"eval_steps_per_second": 15.207, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.7657909723127117e-05, |
|
"loss": 5.5509, |
|
"step": 370500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.762774664948578e-05, |
|
"loss": 5.5528, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_loss": 5.5896897315979, |
|
"eval_runtime": 4.1957, |
|
"eval_samples_per_second": 238.578, |
|
"eval_steps_per_second": 15.015, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.759764390199173e-05, |
|
"loss": 5.5553, |
|
"step": 371500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.7567480828350394e-05, |
|
"loss": 5.5542, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 5.589568138122559, |
|
"eval_runtime": 4.162, |
|
"eval_samples_per_second": 240.512, |
|
"eval_steps_per_second": 15.137, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.7537317754709058e-05, |
|
"loss": 5.5597, |
|
"step": 372500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.7507154681067725e-05, |
|
"loss": 5.5624, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 5.575145244598389, |
|
"eval_runtime": 4.052, |
|
"eval_samples_per_second": 247.036, |
|
"eval_steps_per_second": 15.548, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.747705193357367e-05, |
|
"loss": 5.5584, |
|
"step": 373500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.744688885993234e-05, |
|
"loss": 5.5456, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 5.605469226837158, |
|
"eval_runtime": 4.0454, |
|
"eval_samples_per_second": 247.441, |
|
"eval_steps_per_second": 15.573, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7416725786291005e-05, |
|
"loss": 5.5504, |
|
"step": 374500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.738656271264967e-05, |
|
"loss": 5.5575, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 5.579885005950928, |
|
"eval_runtime": 4.0178, |
|
"eval_samples_per_second": 249.138, |
|
"eval_steps_per_second": 15.68, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7356399639008335e-05, |
|
"loss": 5.5487, |
|
"step": 375500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7326236565367002e-05, |
|
"loss": 5.5439, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 5.579631328582764, |
|
"eval_runtime": 5.2771, |
|
"eval_samples_per_second": 189.686, |
|
"eval_steps_per_second": 11.938, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.729613381787295e-05, |
|
"loss": 5.5649, |
|
"step": 376500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7265970744231615e-05, |
|
"loss": 5.5515, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 5.597219944000244, |
|
"eval_runtime": 4.0434, |
|
"eval_samples_per_second": 247.561, |
|
"eval_steps_per_second": 15.581, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.723580767059028e-05, |
|
"loss": 5.559, |
|
"step": 377500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.720564459694895e-05, |
|
"loss": 5.5702, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 5.597592353820801, |
|
"eval_runtime": 3.8939, |
|
"eval_samples_per_second": 257.067, |
|
"eval_steps_per_second": 16.179, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.717548152330761e-05, |
|
"loss": 5.561, |
|
"step": 378500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.7145378775813562e-05, |
|
"loss": 5.5536, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 5.638396739959717, |
|
"eval_runtime": 4.0063, |
|
"eval_samples_per_second": 249.856, |
|
"eval_steps_per_second": 15.725, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.7115215702172226e-05, |
|
"loss": 5.5627, |
|
"step": 379500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.708505262853089e-05, |
|
"loss": 5.555, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 5.568854808807373, |
|
"eval_runtime": 4.112, |
|
"eval_samples_per_second": 243.436, |
|
"eval_steps_per_second": 15.321, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.7054889554889556e-05, |
|
"loss": 5.5682, |
|
"step": 380500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.702472648124822e-05, |
|
"loss": 5.5562, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 5.613949298858643, |
|
"eval_runtime": 4.1925, |
|
"eval_samples_per_second": 238.759, |
|
"eval_steps_per_second": 15.027, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.6994563407606887e-05, |
|
"loss": 5.5564, |
|
"step": 381500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.696440033396555e-05, |
|
"loss": 5.5478, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 5.5822858810424805, |
|
"eval_runtime": 4.2141, |
|
"eval_samples_per_second": 237.533, |
|
"eval_steps_per_second": 14.95, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.6934237260324218e-05, |
|
"loss": 5.5597, |
|
"step": 382500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.6904134512830164e-05, |
|
"loss": 5.5682, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_loss": 5.5694403648376465, |
|
"eval_runtime": 4.2372, |
|
"eval_samples_per_second": 236.243, |
|
"eval_steps_per_second": 14.868, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.6873971439188834e-05, |
|
"loss": 5.5571, |
|
"step": 383500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.6843808365547494e-05, |
|
"loss": 5.56, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_loss": 5.621694564819336, |
|
"eval_runtime": 4.1243, |
|
"eval_samples_per_second": 242.707, |
|
"eval_steps_per_second": 15.275, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.6813645291906165e-05, |
|
"loss": 5.5608, |
|
"step": 384500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.6783482218264828e-05, |
|
"loss": 5.5455, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_loss": 5.554786205291748, |
|
"eval_runtime": 5.749, |
|
"eval_samples_per_second": 174.116, |
|
"eval_steps_per_second": 10.958, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.6753319144623495e-05, |
|
"loss": 5.5584, |
|
"step": 385500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.672315607098216e-05, |
|
"loss": 5.5682, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_loss": 5.536126136779785, |
|
"eval_runtime": 4.2461, |
|
"eval_samples_per_second": 235.746, |
|
"eval_steps_per_second": 14.837, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.6693053323488108e-05, |
|
"loss": 5.5461, |
|
"step": 386500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.6662890249846772e-05, |
|
"loss": 5.5556, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_loss": 5.569164276123047, |
|
"eval_runtime": 4.2217, |
|
"eval_samples_per_second": 237.109, |
|
"eval_steps_per_second": 14.923, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.6632727176205442e-05, |
|
"loss": 5.5503, |
|
"step": 387500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.6602564102564102e-05, |
|
"loss": 5.561, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_loss": 5.604675769805908, |
|
"eval_runtime": 4.2406, |
|
"eval_samples_per_second": 236.052, |
|
"eval_steps_per_second": 14.856, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.6572401028922773e-05, |
|
"loss": 5.5453, |
|
"step": 388500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.654229828142872e-05, |
|
"loss": 5.5681, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 5.55439567565918, |
|
"eval_runtime": 4.2063, |
|
"eval_samples_per_second": 237.977, |
|
"eval_steps_per_second": 14.978, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.651213520778738e-05, |
|
"loss": 5.5375, |
|
"step": 389500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.648197213414605e-05, |
|
"loss": 5.5561, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 5.5874738693237305, |
|
"eval_runtime": 4.1774, |
|
"eval_samples_per_second": 239.624, |
|
"eval_steps_per_second": 15.081, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.6451809060504713e-05, |
|
"loss": 5.5464, |
|
"step": 390500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.642164598686338e-05, |
|
"loss": 5.5325, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_loss": 5.5877885818481445, |
|
"eval_runtime": 4.0293, |
|
"eval_samples_per_second": 248.429, |
|
"eval_steps_per_second": 15.635, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.6391543239369326e-05, |
|
"loss": 5.5384, |
|
"step": 391500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.6361380165727993e-05, |
|
"loss": 5.5592, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_loss": 5.5823540687561035, |
|
"eval_runtime": 3.8746, |
|
"eval_samples_per_second": 258.349, |
|
"eval_steps_per_second": 16.26, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.6331217092086657e-05, |
|
"loss": 5.5406, |
|
"step": 392500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.6301054018445327e-05, |
|
"loss": 5.554, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_loss": 5.59796667098999, |
|
"eval_runtime": 3.8288, |
|
"eval_samples_per_second": 261.442, |
|
"eval_steps_per_second": 16.454, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.6270890944803987e-05, |
|
"loss": 5.5465, |
|
"step": 393500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6240727871162658e-05, |
|
"loss": 5.546, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 5.55270528793335, |
|
"eval_runtime": 4.0947, |
|
"eval_samples_per_second": 244.462, |
|
"eval_steps_per_second": 15.386, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6210625123668604e-05, |
|
"loss": 5.5609, |
|
"step": 394500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.618046205002727e-05, |
|
"loss": 5.5515, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 5.575203895568848, |
|
"eval_runtime": 4.2143, |
|
"eval_samples_per_second": 237.525, |
|
"eval_steps_per_second": 14.949, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6150298976385934e-05, |
|
"loss": 5.5391, |
|
"step": 395500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6120135902744598e-05, |
|
"loss": 5.5524, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 5.595301151275635, |
|
"eval_runtime": 4.0739, |
|
"eval_samples_per_second": 245.713, |
|
"eval_steps_per_second": 15.464, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.6089972829103265e-05, |
|
"loss": 5.5546, |
|
"step": 396500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.605987008160921e-05, |
|
"loss": 5.5506, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 5.568810939788818, |
|
"eval_runtime": 4.1531, |
|
"eval_samples_per_second": 241.022, |
|
"eval_steps_per_second": 15.169, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.6029707007967878e-05, |
|
"loss": 5.5422, |
|
"step": 397500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.5999604260473824e-05, |
|
"loss": 5.5606, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 5.557425022125244, |
|
"eval_runtime": 4.2221, |
|
"eval_samples_per_second": 237.084, |
|
"eval_steps_per_second": 14.921, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.5969441186832494e-05, |
|
"loss": 5.5471, |
|
"step": 398500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.5939278113191158e-05, |
|
"loss": 5.5514, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 5.553510665893555, |
|
"eval_runtime": 4.1728, |
|
"eval_samples_per_second": 239.885, |
|
"eval_steps_per_second": 15.098, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.5909115039549825e-05, |
|
"loss": 5.5371, |
|
"step": 399500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.587895196590849e-05, |
|
"loss": 5.5452, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_loss": 5.55861759185791, |
|
"eval_runtime": 4.2253, |
|
"eval_samples_per_second": 236.905, |
|
"eval_steps_per_second": 14.91, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.5848788892267156e-05, |
|
"loss": 5.5338, |
|
"step": 400500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.581862581862582e-05, |
|
"loss": 5.5538, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_loss": 5.5573577880859375, |
|
"eval_runtime": 4.2195, |
|
"eval_samples_per_second": 237.233, |
|
"eval_steps_per_second": 14.931, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.5788462744984486e-05, |
|
"loss": 5.5498, |
|
"step": 401500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5758359997490432e-05, |
|
"loss": 5.5545, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 5.518813133239746, |
|
"eval_runtime": 4.0734, |
|
"eval_samples_per_second": 245.741, |
|
"eval_steps_per_second": 15.466, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5728196923849103e-05, |
|
"loss": 5.5528, |
|
"step": 402500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5698033850207763e-05, |
|
"loss": 5.5465, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 5.5730109214782715, |
|
"eval_runtime": 4.2103, |
|
"eval_samples_per_second": 237.751, |
|
"eval_steps_per_second": 14.963, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5667870776566426e-05, |
|
"loss": 5.554, |
|
"step": 403500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5637707702925097e-05, |
|
"loss": 5.5516, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 5.568479061126709, |
|
"eval_runtime": 4.095, |
|
"eval_samples_per_second": 244.447, |
|
"eval_steps_per_second": 15.385, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5607604955431043e-05, |
|
"loss": 5.5377, |
|
"step": 404500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.557744188178971e-05, |
|
"loss": 5.5422, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_loss": 5.573971748352051, |
|
"eval_runtime": 4.0905, |
|
"eval_samples_per_second": 244.713, |
|
"eval_steps_per_second": 15.401, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5547278808148373e-05, |
|
"loss": 5.5486, |
|
"step": 405500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.551711573450704e-05, |
|
"loss": 5.5566, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_loss": 5.565399646759033, |
|
"eval_runtime": 4.2007, |
|
"eval_samples_per_second": 238.293, |
|
"eval_steps_per_second": 14.997, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5486952660865704e-05, |
|
"loss": 5.5534, |
|
"step": 406500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.545678958722437e-05, |
|
"loss": 5.5585, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_loss": 5.614578723907471, |
|
"eval_runtime": 4.1661, |
|
"eval_samples_per_second": 240.272, |
|
"eval_steps_per_second": 15.122, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5426686839730317e-05, |
|
"loss": 5.5461, |
|
"step": 407500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5396523766088987e-05, |
|
"loss": 5.5577, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 5.5891923904418945, |
|
"eval_runtime": 4.1531, |
|
"eval_samples_per_second": 241.024, |
|
"eval_steps_per_second": 15.169, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5366360692447648e-05, |
|
"loss": 5.5564, |
|
"step": 408500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5336197618806318e-05, |
|
"loss": 5.5332, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 5.594736099243164, |
|
"eval_runtime": 4.1179, |
|
"eval_samples_per_second": 243.084, |
|
"eval_steps_per_second": 15.299, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5306094871312264e-05, |
|
"loss": 5.562, |
|
"step": 409500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.527593179767093e-05, |
|
"loss": 5.5479, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 5.611057758331299, |
|
"eval_runtime": 3.9514, |
|
"eval_samples_per_second": 253.327, |
|
"eval_steps_per_second": 15.944, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5245768724029595e-05, |
|
"loss": 5.5425, |
|
"step": 410500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5215605650388258e-05, |
|
"loss": 5.5615, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_loss": 5.556435585021973, |
|
"eval_runtime": 3.947, |
|
"eval_samples_per_second": 253.608, |
|
"eval_steps_per_second": 15.961, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5185442576746925e-05, |
|
"loss": 5.5489, |
|
"step": 411500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.515533982925287e-05, |
|
"loss": 5.5524, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_loss": 5.55076265335083, |
|
"eval_runtime": 3.7148, |
|
"eval_samples_per_second": 269.464, |
|
"eval_steps_per_second": 16.959, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5125176755611542e-05, |
|
"loss": 5.5383, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5095013681970202e-05, |
|
"loss": 5.551, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_loss": 5.5648393630981445, |
|
"eval_runtime": 3.9, |
|
"eval_samples_per_second": 256.665, |
|
"eval_steps_per_second": 16.154, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.5064850608328872e-05, |
|
"loss": 5.5427, |
|
"step": 413500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.5034687534687536e-05, |
|
"loss": 5.5444, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 5.562320709228516, |
|
"eval_runtime": 4.0551, |
|
"eval_samples_per_second": 246.849, |
|
"eval_steps_per_second": 15.536, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.5004584787193485e-05, |
|
"loss": 5.549, |
|
"step": 414500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.497442171355215e-05, |
|
"loss": 5.5571, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 5.529527187347412, |
|
"eval_runtime": 3.9424, |
|
"eval_samples_per_second": 253.906, |
|
"eval_steps_per_second": 15.98, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.4944258639910813e-05, |
|
"loss": 5.537, |
|
"step": 415500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.491409556626948e-05, |
|
"loss": 5.5462, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 5.569790363311768, |
|
"eval_runtime": 4.0546, |
|
"eval_samples_per_second": 246.881, |
|
"eval_steps_per_second": 15.538, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.488399281877543e-05, |
|
"loss": 5.5532, |
|
"step": 416500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4853829745134093e-05, |
|
"loss": 5.5461, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 5.56859827041626, |
|
"eval_runtime": 4.2053, |
|
"eval_samples_per_second": 238.032, |
|
"eval_steps_per_second": 14.981, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.482366667149276e-05, |
|
"loss": 5.5524, |
|
"step": 417500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4793503597851427e-05, |
|
"loss": 5.5539, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 5.538581371307373, |
|
"eval_runtime": 4.1418, |
|
"eval_samples_per_second": 241.683, |
|
"eval_steps_per_second": 15.211, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.476334052421009e-05, |
|
"loss": 5.5466, |
|
"step": 418500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.473323777671604e-05, |
|
"loss": 5.5561, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 5.570122241973877, |
|
"eval_runtime": 4.1361, |
|
"eval_samples_per_second": 242.018, |
|
"eval_steps_per_second": 15.232, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.4703074703074707e-05, |
|
"loss": 5.5666, |
|
"step": 419500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.467291162943337e-05, |
|
"loss": 5.554, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 5.601891040802002, |
|
"eval_runtime": 4.2243, |
|
"eval_samples_per_second": 236.962, |
|
"eval_steps_per_second": 14.914, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.4642748555792037e-05, |
|
"loss": 5.5529, |
|
"step": 420500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.4612645808297983e-05, |
|
"loss": 5.543, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 5.567346096038818, |
|
"eval_runtime": 4.1937, |
|
"eval_samples_per_second": 238.694, |
|
"eval_steps_per_second": 15.023, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4582482734656647e-05, |
|
"loss": 5.5413, |
|
"step": 421500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4552319661015314e-05, |
|
"loss": 5.5391, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 5.597777366638184, |
|
"eval_runtime": 4.1882, |
|
"eval_samples_per_second": 239.006, |
|
"eval_steps_per_second": 15.042, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4522156587373978e-05, |
|
"loss": 5.5391, |
|
"step": 422500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4491993513732644e-05, |
|
"loss": 5.5506, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 5.593500137329102, |
|
"eval_runtime": 4.1978, |
|
"eval_samples_per_second": 238.459, |
|
"eval_steps_per_second": 15.008, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.446183044009131e-05, |
|
"loss": 5.5521, |
|
"step": 423500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4431727692597258e-05, |
|
"loss": 5.5379, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 5.544180393218994, |
|
"eval_runtime": 4.1787, |
|
"eval_samples_per_second": 239.549, |
|
"eval_steps_per_second": 15.077, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.4401564618955925e-05, |
|
"loss": 5.5495, |
|
"step": 424500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.437140154531459e-05, |
|
"loss": 5.5417, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_loss": 5.583528995513916, |
|
"eval_runtime": 4.1305, |
|
"eval_samples_per_second": 242.344, |
|
"eval_steps_per_second": 15.252, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.4341238471673255e-05, |
|
"loss": 5.5366, |
|
"step": 425500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.4311135724179205e-05, |
|
"loss": 5.5373, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_loss": 5.552159786224365, |
|
"eval_runtime": 4.2057, |
|
"eval_samples_per_second": 238.008, |
|
"eval_steps_per_second": 14.98, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.4280972650537868e-05, |
|
"loss": 5.5466, |
|
"step": 426500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4250809576896535e-05, |
|
"loss": 5.5248, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 5.536990642547607, |
|
"eval_runtime": 4.0967, |
|
"eval_samples_per_second": 244.344, |
|
"eval_steps_per_second": 15.378, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4220646503255202e-05, |
|
"loss": 5.5544, |
|
"step": 427500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4190483429613866e-05, |
|
"loss": 5.5438, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 5.581920146942139, |
|
"eval_runtime": 4.05, |
|
"eval_samples_per_second": 247.158, |
|
"eval_steps_per_second": 15.555, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4160320355972533e-05, |
|
"loss": 5.5543, |
|
"step": 428500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.413021760847848e-05, |
|
"loss": 5.5448, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 5.557128429412842, |
|
"eval_runtime": 4.1843, |
|
"eval_samples_per_second": 239.226, |
|
"eval_steps_per_second": 15.056, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4100054534837142e-05, |
|
"loss": 5.536, |
|
"step": 429500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.406989146119581e-05, |
|
"loss": 5.5539, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_loss": 5.58180570602417, |
|
"eval_runtime": 4.1974, |
|
"eval_samples_per_second": 238.479, |
|
"eval_steps_per_second": 15.009, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.4039728387554476e-05, |
|
"loss": 5.5363, |
|
"step": 430500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.400956531391314e-05, |
|
"loss": 5.5494, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_loss": 5.602090835571289, |
|
"eval_runtime": 4.0312, |
|
"eval_samples_per_second": 248.316, |
|
"eval_steps_per_second": 15.628, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.397946256641909e-05, |
|
"loss": 5.5326, |
|
"step": 431500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.3949299492777753e-05, |
|
"loss": 5.5442, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_loss": 5.6263041496276855, |
|
"eval_runtime": 3.9955, |
|
"eval_samples_per_second": 250.529, |
|
"eval_steps_per_second": 15.768, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.391913641913642e-05, |
|
"loss": 5.5484, |
|
"step": 432500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3888973345495087e-05, |
|
"loss": 5.5355, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 5.561853885650635, |
|
"eval_runtime": 4.1484, |
|
"eval_samples_per_second": 241.296, |
|
"eval_steps_per_second": 15.186, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3858870598001033e-05, |
|
"loss": 5.5404, |
|
"step": 433500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.38287075243597e-05, |
|
"loss": 5.541, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 5.530201435089111, |
|
"eval_runtime": 4.1149, |
|
"eval_samples_per_second": 243.262, |
|
"eval_steps_per_second": 15.31, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3798544450718367e-05, |
|
"loss": 5.5463, |
|
"step": 434500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.376838137707703e-05, |
|
"loss": 5.5308, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 5.592388153076172, |
|
"eval_runtime": 4.1121, |
|
"eval_samples_per_second": 243.428, |
|
"eval_steps_per_second": 15.321, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.3738218303435698e-05, |
|
"loss": 5.5466, |
|
"step": 435500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.3708115555941644e-05, |
|
"loss": 5.5422, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 5.5666399002075195, |
|
"eval_runtime": 4.0817, |
|
"eval_samples_per_second": 245.244, |
|
"eval_steps_per_second": 15.435, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.3677952482300307e-05, |
|
"loss": 5.5384, |
|
"step": 436500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.3647789408658974e-05, |
|
"loss": 5.5429, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 5.560754299163818, |
|
"eval_runtime": 5.9793, |
|
"eval_samples_per_second": 167.411, |
|
"eval_steps_per_second": 10.536, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.361762633501764e-05, |
|
"loss": 5.5554, |
|
"step": 437500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.3587463261376305e-05, |
|
"loss": 5.5484, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 5.554554462432861, |
|
"eval_runtime": 3.705, |
|
"eval_samples_per_second": 270.178, |
|
"eval_steps_per_second": 17.004, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.3557300187734972e-05, |
|
"loss": 5.5452, |
|
"step": 438500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.3527137114093635e-05, |
|
"loss": 5.5396, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 5.551665782928467, |
|
"eval_runtime": 3.5833, |
|
"eval_samples_per_second": 279.351, |
|
"eval_steps_per_second": 17.582, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.3496974040452302e-05, |
|
"loss": 5.5407, |
|
"step": 439500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.3466931619105535e-05, |
|
"loss": 5.5455, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 5.568324089050293, |
|
"eval_runtime": 3.9095, |
|
"eval_samples_per_second": 256.043, |
|
"eval_steps_per_second": 16.115, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.3436768545464198e-05, |
|
"loss": 5.5508, |
|
"step": 440500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3406605471822865e-05, |
|
"loss": 5.5582, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 5.559935569763184, |
|
"eval_runtime": 3.8918, |
|
"eval_samples_per_second": 257.21, |
|
"eval_steps_per_second": 16.188, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3376442398181532e-05, |
|
"loss": 5.5465, |
|
"step": 441500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3346279324540196e-05, |
|
"loss": 5.5556, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 5.554677963256836, |
|
"eval_runtime": 3.9336, |
|
"eval_samples_per_second": 254.477, |
|
"eval_steps_per_second": 16.016, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3316116250898863e-05, |
|
"loss": 5.5417, |
|
"step": 442500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.328601350340481e-05, |
|
"loss": 5.5424, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 5.574638843536377, |
|
"eval_runtime": 4.029, |
|
"eval_samples_per_second": 248.45, |
|
"eval_steps_per_second": 15.637, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.3255850429763472e-05, |
|
"loss": 5.5643, |
|
"step": 443500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.322568735612214e-05, |
|
"loss": 5.5361, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_loss": 5.589256286621094, |
|
"eval_runtime": 4.176, |
|
"eval_samples_per_second": 239.704, |
|
"eval_steps_per_second": 15.086, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.3195524282480803e-05, |
|
"loss": 5.5349, |
|
"step": 444500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.316536120883947e-05, |
|
"loss": 5.5408, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_loss": 5.5889177322387695, |
|
"eval_runtime": 4.1178, |
|
"eval_samples_per_second": 243.092, |
|
"eval_steps_per_second": 15.299, |
|
"step": 445000 |
|
} |
|
], |
|
"max_steps": 828828, |
|
"num_train_epochs": 3, |
|
"total_flos": 5.4536747264337715e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|