|
{ |
|
"best_metric": 0.016336046159267426, |
|
"best_model_checkpoint": "/temp/t5_base-qg-ap-test/checkpoint-100", |
|
"epoch": 100.0, |
|
"eval_steps": 500, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 12.805366516113281, |
|
"eval_runtime": 0.2208, |
|
"eval_samples_per_second": 36.238, |
|
"eval_steps_per_second": 4.53, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 10.788012504577637, |
|
"eval_runtime": 0.2356, |
|
"eval_samples_per_second": 33.954, |
|
"eval_steps_per_second": 4.244, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 8.873129844665527, |
|
"eval_runtime": 0.2362, |
|
"eval_samples_per_second": 33.872, |
|
"eval_steps_per_second": 4.234, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 7.4068284034729, |
|
"eval_runtime": 0.2351, |
|
"eval_samples_per_second": 34.03, |
|
"eval_steps_per_second": 4.254, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 6.458061218261719, |
|
"eval_runtime": 0.2334, |
|
"eval_samples_per_second": 34.275, |
|
"eval_steps_per_second": 4.284, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 5.647502422332764, |
|
"eval_runtime": 0.2335, |
|
"eval_samples_per_second": 34.259, |
|
"eval_steps_per_second": 4.282, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 4.959558486938477, |
|
"eval_runtime": 0.2256, |
|
"eval_samples_per_second": 35.464, |
|
"eval_steps_per_second": 4.433, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 4.50578498840332, |
|
"eval_runtime": 0.2359, |
|
"eval_samples_per_second": 33.916, |
|
"eval_steps_per_second": 4.239, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 4.076832294464111, |
|
"eval_runtime": 0.2354, |
|
"eval_samples_per_second": 33.984, |
|
"eval_steps_per_second": 4.248, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 3.704714059829712, |
|
"eval_runtime": 0.2353, |
|
"eval_samples_per_second": 33.999, |
|
"eval_steps_per_second": 4.25, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 3.4143009185791016, |
|
"eval_runtime": 0.2359, |
|
"eval_samples_per_second": 33.914, |
|
"eval_steps_per_second": 4.239, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 3.1360208988189697, |
|
"eval_runtime": 0.2359, |
|
"eval_samples_per_second": 33.913, |
|
"eval_steps_per_second": 4.239, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 2.8865740299224854, |
|
"eval_runtime": 0.2356, |
|
"eval_samples_per_second": 33.955, |
|
"eval_steps_per_second": 4.244, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 2.6324548721313477, |
|
"eval_runtime": 0.234, |
|
"eval_samples_per_second": 34.185, |
|
"eval_steps_per_second": 4.273, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 2.388902187347412, |
|
"eval_runtime": 0.2365, |
|
"eval_samples_per_second": 33.826, |
|
"eval_steps_per_second": 4.228, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 2.1914408206939697, |
|
"eval_runtime": 0.2324, |
|
"eval_samples_per_second": 34.427, |
|
"eval_steps_per_second": 4.303, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 2.0423526763916016, |
|
"eval_runtime": 0.2363, |
|
"eval_samples_per_second": 33.85, |
|
"eval_steps_per_second": 4.231, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 1.9110742807388306, |
|
"eval_runtime": 0.2247, |
|
"eval_samples_per_second": 35.611, |
|
"eval_steps_per_second": 4.451, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 1.7762634754180908, |
|
"eval_runtime": 0.2365, |
|
"eval_samples_per_second": 33.825, |
|
"eval_steps_per_second": 4.228, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 1.6505231857299805, |
|
"eval_runtime": 0.2253, |
|
"eval_samples_per_second": 35.504, |
|
"eval_steps_per_second": 4.438, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 1.525721788406372, |
|
"eval_runtime": 0.2362, |
|
"eval_samples_per_second": 33.872, |
|
"eval_steps_per_second": 4.234, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 1.4125868082046509, |
|
"eval_runtime": 0.235, |
|
"eval_samples_per_second": 34.046, |
|
"eval_steps_per_second": 4.256, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 1.3109110593795776, |
|
"eval_runtime": 0.2369, |
|
"eval_samples_per_second": 33.769, |
|
"eval_steps_per_second": 4.221, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 1.2188917398452759, |
|
"eval_runtime": 0.2319, |
|
"eval_samples_per_second": 34.5, |
|
"eval_steps_per_second": 4.312, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 1.1338324546813965, |
|
"eval_runtime": 0.2362, |
|
"eval_samples_per_second": 33.873, |
|
"eval_steps_per_second": 4.234, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 1.0485577583312988, |
|
"eval_runtime": 0.2311, |
|
"eval_samples_per_second": 34.62, |
|
"eval_steps_per_second": 4.328, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 0.9640414118766785, |
|
"eval_runtime": 0.2359, |
|
"eval_samples_per_second": 33.914, |
|
"eval_steps_per_second": 4.239, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 0.8827559947967529, |
|
"eval_runtime": 0.2268, |
|
"eval_samples_per_second": 35.271, |
|
"eval_steps_per_second": 4.409, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 0.8060356378555298, |
|
"eval_runtime": 0.2364, |
|
"eval_samples_per_second": 33.848, |
|
"eval_steps_per_second": 4.231, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.7329221367835999, |
|
"eval_runtime": 0.2336, |
|
"eval_samples_per_second": 34.244, |
|
"eval_steps_per_second": 4.281, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 0.6638815402984619, |
|
"eval_runtime": 0.2359, |
|
"eval_samples_per_second": 33.912, |
|
"eval_steps_per_second": 4.239, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 0.6010197997093201, |
|
"eval_runtime": 0.235, |
|
"eval_samples_per_second": 34.042, |
|
"eval_steps_per_second": 4.255, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 0.5438850522041321, |
|
"eval_runtime": 0.2331, |
|
"eval_samples_per_second": 34.324, |
|
"eval_steps_per_second": 4.29, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 0.4924549162387848, |
|
"eval_runtime": 0.2353, |
|
"eval_samples_per_second": 34.004, |
|
"eval_steps_per_second": 4.25, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 0.4471151828765869, |
|
"eval_runtime": 0.2413, |
|
"eval_samples_per_second": 33.158, |
|
"eval_steps_per_second": 4.145, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 0.4065961539745331, |
|
"eval_runtime": 0.2355, |
|
"eval_samples_per_second": 33.966, |
|
"eval_steps_per_second": 4.246, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 0.3690074682235718, |
|
"eval_runtime": 0.2363, |
|
"eval_samples_per_second": 33.853, |
|
"eval_steps_per_second": 4.232, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 0.3340989649295807, |
|
"eval_runtime": 0.2363, |
|
"eval_samples_per_second": 33.86, |
|
"eval_steps_per_second": 4.232, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 0.3023061454296112, |
|
"eval_runtime": 0.2338, |
|
"eval_samples_per_second": 34.218, |
|
"eval_steps_per_second": 4.277, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 0.27456292510032654, |
|
"eval_runtime": 0.2359, |
|
"eval_samples_per_second": 33.907, |
|
"eval_steps_per_second": 4.238, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 0.24695347249507904, |
|
"eval_runtime": 0.2296, |
|
"eval_samples_per_second": 34.843, |
|
"eval_steps_per_second": 4.355, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 0.220541313290596, |
|
"eval_runtime": 0.2354, |
|
"eval_samples_per_second": 33.99, |
|
"eval_steps_per_second": 4.249, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 0.19677509367465973, |
|
"eval_runtime": 0.2327, |
|
"eval_samples_per_second": 34.379, |
|
"eval_steps_per_second": 4.297, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 0.17713746428489685, |
|
"eval_runtime": 0.2348, |
|
"eval_samples_per_second": 34.066, |
|
"eval_steps_per_second": 4.258, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 0.15933585166931152, |
|
"eval_runtime": 0.2234, |
|
"eval_samples_per_second": 35.814, |
|
"eval_steps_per_second": 4.477, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 0.14242056012153625, |
|
"eval_runtime": 0.2366, |
|
"eval_samples_per_second": 33.807, |
|
"eval_steps_per_second": 4.226, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 0.1287701427936554, |
|
"eval_runtime": 0.2313, |
|
"eval_samples_per_second": 34.594, |
|
"eval_steps_per_second": 4.324, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 0.11695855855941772, |
|
"eval_runtime": 0.2269, |
|
"eval_samples_per_second": 35.251, |
|
"eval_steps_per_second": 4.406, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 0.1070137694478035, |
|
"eval_runtime": 0.2278, |
|
"eval_samples_per_second": 35.111, |
|
"eval_steps_per_second": 4.389, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 0.09962165355682373, |
|
"eval_runtime": 0.2347, |
|
"eval_samples_per_second": 34.085, |
|
"eval_steps_per_second": 4.261, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_loss": 0.09394610673189163, |
|
"eval_runtime": 0.2348, |
|
"eval_samples_per_second": 34.077, |
|
"eval_steps_per_second": 4.26, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_loss": 0.08877500891685486, |
|
"eval_runtime": 0.2326, |
|
"eval_samples_per_second": 34.394, |
|
"eval_steps_per_second": 4.299, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_loss": 0.08450286090373993, |
|
"eval_runtime": 0.2348, |
|
"eval_samples_per_second": 34.07, |
|
"eval_steps_per_second": 4.259, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_loss": 0.0817728266119957, |
|
"eval_runtime": 0.2313, |
|
"eval_samples_per_second": 34.588, |
|
"eval_steps_per_second": 4.324, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_loss": 0.07895343005657196, |
|
"eval_runtime": 0.2359, |
|
"eval_samples_per_second": 33.915, |
|
"eval_steps_per_second": 4.239, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_loss": 0.07630708068609238, |
|
"eval_runtime": 0.2283, |
|
"eval_samples_per_second": 35.038, |
|
"eval_steps_per_second": 4.38, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_loss": 0.0731731578707695, |
|
"eval_runtime": 0.2364, |
|
"eval_samples_per_second": 33.835, |
|
"eval_steps_per_second": 4.229, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_loss": 0.06972303986549377, |
|
"eval_runtime": 0.2275, |
|
"eval_samples_per_second": 35.171, |
|
"eval_steps_per_second": 4.396, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_loss": 0.06655264645814896, |
|
"eval_runtime": 0.2357, |
|
"eval_samples_per_second": 33.941, |
|
"eval_steps_per_second": 4.243, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_loss": 0.06421676278114319, |
|
"eval_runtime": 0.2353, |
|
"eval_samples_per_second": 34.001, |
|
"eval_steps_per_second": 4.25, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_loss": 0.06110429763793945, |
|
"eval_runtime": 0.2361, |
|
"eval_samples_per_second": 33.886, |
|
"eval_steps_per_second": 4.236, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_loss": 0.05834279954433441, |
|
"eval_runtime": 0.2379, |
|
"eval_samples_per_second": 33.624, |
|
"eval_steps_per_second": 4.203, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_loss": 0.055961962789297104, |
|
"eval_runtime": 0.2264, |
|
"eval_samples_per_second": 35.335, |
|
"eval_steps_per_second": 4.417, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_loss": 0.05323232710361481, |
|
"eval_runtime": 0.2359, |
|
"eval_samples_per_second": 33.916, |
|
"eval_steps_per_second": 4.24, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_loss": 0.051185671240091324, |
|
"eval_runtime": 0.2338, |
|
"eval_samples_per_second": 34.213, |
|
"eval_steps_per_second": 4.277, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_loss": 0.04865783825516701, |
|
"eval_runtime": 0.2358, |
|
"eval_samples_per_second": 33.928, |
|
"eval_steps_per_second": 4.241, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_loss": 0.04639100283384323, |
|
"eval_runtime": 0.2281, |
|
"eval_samples_per_second": 35.066, |
|
"eval_steps_per_second": 4.383, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_loss": 0.04309353977441788, |
|
"eval_runtime": 0.2365, |
|
"eval_samples_per_second": 33.827, |
|
"eval_steps_per_second": 4.228, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_loss": 0.03992551565170288, |
|
"eval_runtime": 0.2324, |
|
"eval_samples_per_second": 34.422, |
|
"eval_steps_per_second": 4.303, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_loss": 0.03812782093882561, |
|
"eval_runtime": 0.2236, |
|
"eval_samples_per_second": 35.783, |
|
"eval_steps_per_second": 4.473, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_loss": 0.03636465594172478, |
|
"eval_runtime": 0.2325, |
|
"eval_samples_per_second": 34.401, |
|
"eval_steps_per_second": 4.3, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_loss": 0.034834641963243484, |
|
"eval_runtime": 0.2358, |
|
"eval_samples_per_second": 33.926, |
|
"eval_steps_per_second": 4.241, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_loss": 0.03329307958483696, |
|
"eval_runtime": 0.2345, |
|
"eval_samples_per_second": 34.111, |
|
"eval_steps_per_second": 4.264, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_loss": 0.031552691012620926, |
|
"eval_runtime": 0.2364, |
|
"eval_samples_per_second": 33.845, |
|
"eval_steps_per_second": 4.231, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_loss": 0.029882650822401047, |
|
"eval_runtime": 0.2329, |
|
"eval_samples_per_second": 34.351, |
|
"eval_steps_per_second": 4.294, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_loss": 0.028516214340925217, |
|
"eval_runtime": 0.2359, |
|
"eval_samples_per_second": 33.907, |
|
"eval_steps_per_second": 4.238, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_loss": 0.027370158582925797, |
|
"eval_runtime": 0.2246, |
|
"eval_samples_per_second": 35.624, |
|
"eval_steps_per_second": 4.453, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_loss": 0.026426443830132484, |
|
"eval_runtime": 0.2254, |
|
"eval_samples_per_second": 35.492, |
|
"eval_steps_per_second": 4.436, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_loss": 0.02534804865717888, |
|
"eval_runtime": 0.2295, |
|
"eval_samples_per_second": 34.856, |
|
"eval_steps_per_second": 4.357, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_loss": 0.024182336404919624, |
|
"eval_runtime": 0.2363, |
|
"eval_samples_per_second": 33.856, |
|
"eval_steps_per_second": 4.232, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_loss": 0.023593546822667122, |
|
"eval_runtime": 0.2361, |
|
"eval_samples_per_second": 33.881, |
|
"eval_steps_per_second": 4.235, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_loss": 0.023052040487527847, |
|
"eval_runtime": 0.2361, |
|
"eval_samples_per_second": 33.879, |
|
"eval_steps_per_second": 4.235, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_loss": 0.02290300466120243, |
|
"eval_runtime": 0.2361, |
|
"eval_samples_per_second": 33.884, |
|
"eval_steps_per_second": 4.236, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_loss": 0.02261677198112011, |
|
"eval_runtime": 0.2286, |
|
"eval_samples_per_second": 35.002, |
|
"eval_steps_per_second": 4.375, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_loss": 0.022289568558335304, |
|
"eval_runtime": 0.2353, |
|
"eval_samples_per_second": 34.003, |
|
"eval_steps_per_second": 4.25, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_loss": 0.02184910513460636, |
|
"eval_runtime": 0.2287, |
|
"eval_samples_per_second": 34.986, |
|
"eval_steps_per_second": 4.373, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_loss": 0.021228935569524765, |
|
"eval_runtime": 0.2349, |
|
"eval_samples_per_second": 34.051, |
|
"eval_steps_per_second": 4.256, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_loss": 0.020517783239483833, |
|
"eval_runtime": 0.2353, |
|
"eval_samples_per_second": 34.002, |
|
"eval_steps_per_second": 4.25, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_loss": 0.019832810387015343, |
|
"eval_runtime": 0.2232, |
|
"eval_samples_per_second": 35.839, |
|
"eval_steps_per_second": 4.48, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_loss": 0.0191506277769804, |
|
"eval_runtime": 0.2312, |
|
"eval_samples_per_second": 34.605, |
|
"eval_steps_per_second": 4.326, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_loss": 0.018617864698171616, |
|
"eval_runtime": 0.2243, |
|
"eval_samples_per_second": 35.667, |
|
"eval_steps_per_second": 4.458, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_loss": 0.01811818592250347, |
|
"eval_runtime": 0.2348, |
|
"eval_samples_per_second": 34.07, |
|
"eval_steps_per_second": 4.259, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_loss": 0.01765601523220539, |
|
"eval_runtime": 0.2363, |
|
"eval_samples_per_second": 33.853, |
|
"eval_steps_per_second": 4.232, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_loss": 0.017278417944908142, |
|
"eval_runtime": 0.2264, |
|
"eval_samples_per_second": 35.331, |
|
"eval_steps_per_second": 4.416, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_loss": 0.016984442248940468, |
|
"eval_runtime": 0.2349, |
|
"eval_samples_per_second": 34.053, |
|
"eval_steps_per_second": 4.257, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_loss": 0.01675889454782009, |
|
"eval_runtime": 0.2343, |
|
"eval_samples_per_second": 34.141, |
|
"eval_steps_per_second": 4.268, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_loss": 0.016593070700764656, |
|
"eval_runtime": 0.2359, |
|
"eval_samples_per_second": 33.915, |
|
"eval_steps_per_second": 4.239, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_loss": 0.016466278582811356, |
|
"eval_runtime": 0.2296, |
|
"eval_samples_per_second": 34.851, |
|
"eval_steps_per_second": 4.356, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_loss": 0.016385838389396667, |
|
"eval_runtime": 0.2364, |
|
"eval_samples_per_second": 33.84, |
|
"eval_steps_per_second": 4.23, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.4009, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 0.016336046159267426, |
|
"eval_runtime": 0.2203, |
|
"eval_samples_per_second": 36.315, |
|
"eval_steps_per_second": 4.539, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 100, |
|
"total_flos": 487166312448000.0, |
|
"train_loss": 1.40092041015625, |
|
"train_runtime": 1064.353, |
|
"train_samples_per_second": 0.752, |
|
"train_steps_per_second": 0.094 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 100, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 487166312448000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|