|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 1000, |
|
"global_step": 50000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.2623244524002075, |
|
"learning_rate": 0.00019994965423831854, |
|
"loss": 1.4022, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 1.4888113737106323, |
|
"eval_runtime": 118.6929, |
|
"eval_samples_per_second": 53.415, |
|
"eval_steps_per_second": 13.354, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.0886244773864746, |
|
"learning_rate": 0.00019954719225730847, |
|
"loss": 1.39, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 1.4779928922653198, |
|
"eval_runtime": 119.2893, |
|
"eval_samples_per_second": 53.148, |
|
"eval_steps_per_second": 13.287, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.0032905340194702, |
|
"learning_rate": 0.00019874388886763944, |
|
"loss": 1.3591, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.4647990465164185, |
|
"eval_runtime": 119.1474, |
|
"eval_samples_per_second": 53.211, |
|
"eval_steps_per_second": 13.303, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.1680448055267334, |
|
"learning_rate": 0.00019754297868854073, |
|
"loss": 1.3373, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 1.4628264904022217, |
|
"eval_runtime": 118.5565, |
|
"eval_samples_per_second": 53.477, |
|
"eval_steps_per_second": 13.369, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.1541577577590942, |
|
"learning_rate": 0.00019594929736144976, |
|
"loss": 1.3124, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.4525669813156128, |
|
"eval_runtime": 119.2106, |
|
"eval_samples_per_second": 53.183, |
|
"eval_steps_per_second": 13.296, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.252061367034912, |
|
"learning_rate": 0.00019396926207859084, |
|
"loss": 1.3781, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.4199129343032837, |
|
"eval_runtime": 119.1684, |
|
"eval_samples_per_second": 53.202, |
|
"eval_steps_per_second": 13.301, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.1096590757369995, |
|
"learning_rate": 0.00019161084574320696, |
|
"loss": 1.4167, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.399798035621643, |
|
"eval_runtime": 117.7451, |
|
"eval_samples_per_second": 53.845, |
|
"eval_steps_per_second": 13.461, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.014760971069336, |
|
"learning_rate": 0.00018888354486549237, |
|
"loss": 1.4106, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.3875452280044556, |
|
"eval_runtime": 117.8417, |
|
"eval_samples_per_second": 53.801, |
|
"eval_steps_per_second": 13.45, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.991398274898529, |
|
"learning_rate": 0.00018579834132349772, |
|
"loss": 1.3985, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 1.3735474348068237, |
|
"eval_runtime": 118.1423, |
|
"eval_samples_per_second": 53.664, |
|
"eval_steps_per_second": 13.416, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.9979888796806335, |
|
"learning_rate": 0.0001823676581429833, |
|
"loss": 1.3924, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.3612616062164307, |
|
"eval_runtime": 119.0319, |
|
"eval_samples_per_second": 53.263, |
|
"eval_steps_per_second": 13.316, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.1004694700241089, |
|
"learning_rate": 0.00017860530947427875, |
|
"loss": 1.3758, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.3603907823562622, |
|
"eval_runtime": 119.1776, |
|
"eval_samples_per_second": 53.198, |
|
"eval_steps_per_second": 13.299, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.1753897666931152, |
|
"learning_rate": 0.0001745264449675755, |
|
"loss": 1.3609, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.3465052843093872, |
|
"eval_runtime": 118.067, |
|
"eval_samples_per_second": 53.698, |
|
"eval_steps_per_second": 13.425, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.2619237899780273, |
|
"learning_rate": 0.00017014748877063214, |
|
"loss": 1.344, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.3324896097183228, |
|
"eval_runtime": 118.0485, |
|
"eval_samples_per_second": 53.707, |
|
"eval_steps_per_second": 13.427, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.9625117778778076, |
|
"learning_rate": 0.00016548607339452853, |
|
"loss": 1.3335, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.323933482170105, |
|
"eval_runtime": 117.505, |
|
"eval_samples_per_second": 53.955, |
|
"eval_steps_per_second": 13.489, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.3606470823287964, |
|
"learning_rate": 0.00016056096871376667, |
|
"loss": 1.3241, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.314150333404541, |
|
"eval_runtime": 117.8746, |
|
"eval_samples_per_second": 53.786, |
|
"eval_steps_per_second": 13.446, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.9183652400970459, |
|
"learning_rate": 0.00015539200638661104, |
|
"loss": 1.3029, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.3047397136688232, |
|
"eval_runtime": 119.2254, |
|
"eval_samples_per_second": 53.177, |
|
"eval_steps_per_second": 13.294, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.9226890206336975, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 1.3005, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.297969102859497, |
|
"eval_runtime": 119.8633, |
|
"eval_samples_per_second": 52.894, |
|
"eval_steps_per_second": 13.223, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.9322838187217712, |
|
"learning_rate": 0.00014440666126057744, |
|
"loss": 1.2951, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.2912102937698364, |
|
"eval_runtime": 119.9122, |
|
"eval_samples_per_second": 52.872, |
|
"eval_steps_per_second": 13.218, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.102362871170044, |
|
"learning_rate": 0.00013863451256931287, |
|
"loss": 1.313, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.282011866569519, |
|
"eval_runtime": 117.9359, |
|
"eval_samples_per_second": 53.758, |
|
"eval_steps_per_second": 13.44, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.0014485120773315, |
|
"learning_rate": 0.00013270679633174218, |
|
"loss": 1.2773, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.2778606414794922, |
|
"eval_runtime": 120.0646, |
|
"eval_samples_per_second": 52.805, |
|
"eval_steps_per_second": 13.201, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.8490633964538574, |
|
"learning_rate": 0.00012664738136900348, |
|
"loss": 1.2734, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.2667981386184692, |
|
"eval_runtime": 117.9405, |
|
"eval_samples_per_second": 53.756, |
|
"eval_steps_per_second": 13.439, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.9456785321235657, |
|
"learning_rate": 0.00012048066680651908, |
|
"loss": 1.2656, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.258193850517273, |
|
"eval_runtime": 121.0875, |
|
"eval_samples_per_second": 52.359, |
|
"eval_steps_per_second": 13.09, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.1480209827423096, |
|
"learning_rate": 0.00011423148382732853, |
|
"loss": 1.2522, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.2538591623306274, |
|
"eval_runtime": 118.1025, |
|
"eval_samples_per_second": 53.682, |
|
"eval_steps_per_second": 13.421, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.8957574963569641, |
|
"learning_rate": 0.00010792499568567884, |
|
"loss": 1.2519, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.2467232942581177, |
|
"eval_runtime": 118.0714, |
|
"eval_samples_per_second": 53.696, |
|
"eval_steps_per_second": 13.424, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.2852896451950073, |
|
"learning_rate": 0.00010158659638348081, |
|
"loss": 1.24, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.2399760484695435, |
|
"eval_runtime": 118.556, |
|
"eval_samples_per_second": 53.477, |
|
"eval_steps_per_second": 13.369, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.9100021719932556, |
|
"learning_rate": 9.524180841762577e-05, |
|
"loss": 1.2653, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.2347520589828491, |
|
"eval_runtime": 118.0581, |
|
"eval_samples_per_second": 53.702, |
|
"eval_steps_per_second": 13.426, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.3319469690322876, |
|
"learning_rate": 8.891618000989891e-05, |
|
"loss": 1.2313, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.2284266948699951, |
|
"eval_runtime": 118.5886, |
|
"eval_samples_per_second": 53.462, |
|
"eval_steps_per_second": 13.366, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.1901592016220093, |
|
"learning_rate": 8.263518223330697e-05, |
|
"loss": 1.2218, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.2233468294143677, |
|
"eval_runtime": 118.009, |
|
"eval_samples_per_second": 53.725, |
|
"eval_steps_per_second": 13.431, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.026314616203308, |
|
"learning_rate": 7.642410644905726e-05, |
|
"loss": 1.2275, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.2184290885925293, |
|
"eval_runtime": 119.1308, |
|
"eval_samples_per_second": 53.219, |
|
"eval_steps_per_second": 13.305, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.3237501382827759, |
|
"learning_rate": 7.030796246717255e-05, |
|
"loss": 1.2395, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.213285207748413, |
|
"eval_runtime": 118.8571, |
|
"eval_samples_per_second": 53.341, |
|
"eval_steps_per_second": 13.335, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.1643166542053223, |
|
"learning_rate": 6.431137784081282e-05, |
|
"loss": 1.2064, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.2104227542877197, |
|
"eval_runtime": 120.4291, |
|
"eval_samples_per_second": 52.645, |
|
"eval_steps_per_second": 13.161, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.7978019118309021, |
|
"learning_rate": 5.845849869981137e-05, |
|
"loss": 1.2141, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.2048578262329102, |
|
"eval_runtime": 119.224, |
|
"eval_samples_per_second": 53.177, |
|
"eval_steps_per_second": 13.294, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.1302543878555298, |
|
"learning_rate": 5.277289252273174e-05, |
|
"loss": 1.2054, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.2011561393737793, |
|
"eval_runtime": 118.3268, |
|
"eval_samples_per_second": 53.58, |
|
"eval_steps_per_second": 13.395, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.2881643772125244, |
|
"learning_rate": 4.727745323894976e-05, |
|
"loss": 1.2136, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.1976003646850586, |
|
"eval_runtime": 118.2934, |
|
"eval_samples_per_second": 53.596, |
|
"eval_steps_per_second": 13.399, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.8197779655456543, |
|
"learning_rate": 4.19943090428802e-05, |
|
"loss": 1.1883, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.1930813789367676, |
|
"eval_runtime": 117.6868, |
|
"eval_samples_per_second": 53.872, |
|
"eval_steps_per_second": 13.468, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.9713582992553711, |
|
"learning_rate": 3.694473329154778e-05, |
|
"loss": 1.2058, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.1900451183319092, |
|
"eval_runtime": 119.439, |
|
"eval_samples_per_second": 53.081, |
|
"eval_steps_per_second": 13.27, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.4913629293441772, |
|
"learning_rate": 3.21490588442868e-05, |
|
"loss": 1.1864, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.1863234043121338, |
|
"eval_runtime": 117.5219, |
|
"eval_samples_per_second": 53.947, |
|
"eval_steps_per_second": 13.487, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.2373270988464355, |
|
"learning_rate": 2.7626596189492983e-05, |
|
"loss": 1.1854, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.1844661235809326, |
|
"eval_runtime": 117.3667, |
|
"eval_samples_per_second": 54.019, |
|
"eval_steps_per_second": 13.505, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.1346243619918823, |
|
"learning_rate": 2.339555568810221e-05, |
|
"loss": 1.1954, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 1.1816102266311646, |
|
"eval_runtime": 118.4828, |
|
"eval_samples_per_second": 53.51, |
|
"eval_steps_per_second": 13.377, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.3110140562057495, |
|
"learning_rate": 1.947297424689414e-05, |
|
"loss": 1.1663, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.178844690322876, |
|
"eval_runtime": 118.8225, |
|
"eval_samples_per_second": 53.357, |
|
"eval_steps_per_second": 13.339, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.2288557291030884, |
|
"learning_rate": 1.587464671688187e-05, |
|
"loss": 1.1912, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 1.177311897277832, |
|
"eval_runtime": 119.099, |
|
"eval_samples_per_second": 53.233, |
|
"eval_steps_per_second": 13.308, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.0926482677459717, |
|
"learning_rate": 1.2615062293021507e-05, |
|
"loss": 1.1855, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 1.1756287813186646, |
|
"eval_runtime": 118.243, |
|
"eval_samples_per_second": 53.618, |
|
"eval_steps_per_second": 13.405, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.0959358215332031, |
|
"learning_rate": 9.707346171337894e-06, |
|
"loss": 1.1773, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 1.1744451522827148, |
|
"eval_runtime": 118.507, |
|
"eval_samples_per_second": 53.499, |
|
"eval_steps_per_second": 13.375, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.8658304810523987, |
|
"learning_rate": 7.163206698392744e-06, |
|
"loss": 1.1874, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 1.1730809211730957, |
|
"eval_runtime": 118.6435, |
|
"eval_samples_per_second": 53.437, |
|
"eval_steps_per_second": 13.359, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.18503737449646, |
|
"learning_rate": 4.992888225905468e-06, |
|
"loss": 1.1679, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 1.171962857246399, |
|
"eval_runtime": 119.251, |
|
"eval_samples_per_second": 53.165, |
|
"eval_steps_per_second": 13.291, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.209384560585022, |
|
"learning_rate": 3.2051298603643753e-06, |
|
"loss": 1.1776, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 1.1715712547302246, |
|
"eval_runtime": 120.3589, |
|
"eval_samples_per_second": 52.676, |
|
"eval_steps_per_second": 13.169, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.157520055770874, |
|
"learning_rate": 1.8071302737293295e-06, |
|
"loss": 1.1708, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 1.1712528467178345, |
|
"eval_runtime": 118.7182, |
|
"eval_samples_per_second": 53.404, |
|
"eval_steps_per_second": 13.351, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.0842524766921997, |
|
"learning_rate": 8.04518716920466e-07, |
|
"loss": 1.2052, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 1.170965313911438, |
|
"eval_runtime": 118.8637, |
|
"eval_samples_per_second": 53.338, |
|
"eval_steps_per_second": 13.335, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.1883560419082642, |
|
"learning_rate": 2.0133235281156736e-07, |
|
"loss": 1.168, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 1.1708762645721436, |
|
"eval_runtime": 118.2509, |
|
"eval_samples_per_second": 53.615, |
|
"eval_steps_per_second": 13.404, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.4387354850769043, |
|
"learning_rate": 0.0, |
|
"loss": 1.1727, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.1708616018295288, |
|
"eval_runtime": 118.0864, |
|
"eval_samples_per_second": 53.689, |
|
"eval_steps_per_second": 13.422, |
|
"step": 50000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 50000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.132424105984e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|