|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.45714285714285713, |
|
"eval_steps": 500, |
|
"global_step": 80, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.1858323913126436e-05, |
|
"loss": 1.7001, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.371664782625287e-05, |
|
"loss": 1.6917, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.557497173937931e-05, |
|
"loss": 1.5124, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00012743329565250574, |
|
"loss": 1.5366, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001592916195656322, |
|
"loss": 1.4751, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019114994347875863, |
|
"loss": 1.4775, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00022300826739188505, |
|
"loss": 1.3617, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002548665913050115, |
|
"loss": 1.3029, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00028672491521813796, |
|
"loss": 1.2041, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0003185832391312644, |
|
"loss": 1.2482, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0003184228430698356, |
|
"loss": 1.1365, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00031794197790187094, |
|
"loss": 1.2314, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0003171416120258239, |
|
"loss": 1.0893, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0003160233572720541, |
|
"loss": 1.0185, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0003145894656568153, |
|
"loss": 1.1633, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00031284282484699455, |
|
"loss": 1.1775, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00031078695234473526, |
|
"loss": 1.0772, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00030842598840365604, |
|
"loss": 1.0825, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00030576468769093104, |
|
"loss": 1.1053, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0003028084097120226, |
|
"loss": 1.0627, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002995631080173512, |
|
"loss": 1.0097, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002960353182126366, |
|
"loss": 1.0523, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00029223214479705777, |
|
"loss": 1.0495, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002881612468557375, |
|
"loss": 0.9913, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00028383082263536385, |
|
"loss": 0.9737, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00027924959303401233, |
|
"loss": 1.0118, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.000274426784038418, |
|
"loss": 1.0696, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00026937210814406584, |
|
"loss": 1.0735, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00026409574479551763, |
|
"loss": 1.0232, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00025860831988636497, |
|
"loss": 1.0537, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00025292088436009396, |
|
"loss": 1.0636, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002470448919549556, |
|
"loss": 1.0604, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00024099217613766146, |
|
"loss": 1.0282, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00023477492627235606, |
|
"loss": 1.0354, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00022840566307286026, |
|
"loss": 1.0877, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00022189721338761954, |
|
"loss": 0.9974, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00021526268436813839, |
|
"loss": 1.069, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002085154370729214, |
|
"loss": 1.0533, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00020166905956007982, |
|
"loss": 1.0155, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019473733952279058, |
|
"loss": 0.9494, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00018773423652271724, |
|
"loss": 0.953, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00018067385387731056, |
|
"loss": 1.0139, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001735704102576041, |
|
"loss": 0.915, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00016643821105370306, |
|
"loss": 0.8843, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001592916195656322, |
|
"loss": 0.9219, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001521450280775614, |
|
"loss": 0.9749, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00014501282887366027, |
|
"loss": 0.9495, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00013790938525395387, |
|
"loss": 1.0825, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00013084900260854716, |
|
"loss": 0.8, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001238458996084738, |
|
"loss": 1.036, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00011691417957118454, |
|
"loss": 0.9924, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00011006780205834297, |
|
"loss": 0.9575, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.000103320554763126, |
|
"loss": 1.0124, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.668602574364485e-05, |
|
"loss": 1.0523, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.017757605840412e-05, |
|
"loss": 1.1276, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.380831285890832e-05, |
|
"loss": 0.9645, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.759106299360295e-05, |
|
"loss": 0.9707, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.153834717630876e-05, |
|
"loss": 1.0702, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.566235477117044e-05, |
|
"loss": 1.0682, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5.9974919244899423e-05, |
|
"loss": 0.8695, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5.448749433574675e-05, |
|
"loss": 0.953, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.921113098719853e-05, |
|
"loss": 0.9892, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.4156455092846426e-05, |
|
"loss": 0.9447, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.933364609725209e-05, |
|
"loss": 0.9473, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.4752416495900555e-05, |
|
"loss": 0.906, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.042199227552688e-05, |
|
"loss": 0.9664, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.635109433420662e-05, |
|
"loss": 1.0343, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.2547920918627827e-05, |
|
"loss": 0.96, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.9020131113913137e-05, |
|
"loss": 0.8727, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.5774829419241735e-05, |
|
"loss": 1.071, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.281855144033336e-05, |
|
"loss": 0.8835, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.0157250727608305e-05, |
|
"loss": 0.9211, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.796286786529122e-06, |
|
"loss": 0.9842, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.740414284269837e-06, |
|
"loss": 1.0236, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.993773474449073e-06, |
|
"loss": 0.9883, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.5598818592102812e-06, |
|
"loss": 0.9573, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.4416271054404854e-06, |
|
"loss": 0.9061, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 6.412612293934594e-07, |
|
"loss": 0.9517, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.6039606142882264e-07, |
|
"loss": 0.9217, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0, |
|
"loss": 0.9262, |
|
"step": 80 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 80, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"total_flos": 1.1629371054302822e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|