hedronstone's picture
Upload 12 files
88d80f9
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.45714285714285713,
"eval_steps": 500,
"global_step": 80,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.1858323913126436e-05,
"loss": 1.7001,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 6.371664782625287e-05,
"loss": 1.6917,
"step": 2
},
{
"epoch": 0.02,
"learning_rate": 9.557497173937931e-05,
"loss": 1.5124,
"step": 3
},
{
"epoch": 0.02,
"learning_rate": 0.00012743329565250574,
"loss": 1.5366,
"step": 4
},
{
"epoch": 0.03,
"learning_rate": 0.0001592916195656322,
"loss": 1.4751,
"step": 5
},
{
"epoch": 0.03,
"learning_rate": 0.00019114994347875863,
"loss": 1.4775,
"step": 6
},
{
"epoch": 0.04,
"learning_rate": 0.00022300826739188505,
"loss": 1.3617,
"step": 7
},
{
"epoch": 0.05,
"learning_rate": 0.0002548665913050115,
"loss": 1.3029,
"step": 8
},
{
"epoch": 0.05,
"learning_rate": 0.00028672491521813796,
"loss": 1.2041,
"step": 9
},
{
"epoch": 0.06,
"learning_rate": 0.0003185832391312644,
"loss": 1.2482,
"step": 10
},
{
"epoch": 0.06,
"learning_rate": 0.0003184228430698356,
"loss": 1.1365,
"step": 11
},
{
"epoch": 0.07,
"learning_rate": 0.00031794197790187094,
"loss": 1.2314,
"step": 12
},
{
"epoch": 0.07,
"learning_rate": 0.0003171416120258239,
"loss": 1.0893,
"step": 13
},
{
"epoch": 0.08,
"learning_rate": 0.0003160233572720541,
"loss": 1.0185,
"step": 14
},
{
"epoch": 0.09,
"learning_rate": 0.0003145894656568153,
"loss": 1.1633,
"step": 15
},
{
"epoch": 0.09,
"learning_rate": 0.00031284282484699455,
"loss": 1.1775,
"step": 16
},
{
"epoch": 0.1,
"learning_rate": 0.00031078695234473526,
"loss": 1.0772,
"step": 17
},
{
"epoch": 0.1,
"learning_rate": 0.00030842598840365604,
"loss": 1.0825,
"step": 18
},
{
"epoch": 0.11,
"learning_rate": 0.00030576468769093104,
"loss": 1.1053,
"step": 19
},
{
"epoch": 0.11,
"learning_rate": 0.0003028084097120226,
"loss": 1.0627,
"step": 20
},
{
"epoch": 0.12,
"learning_rate": 0.0002995631080173512,
"loss": 1.0097,
"step": 21
},
{
"epoch": 0.13,
"learning_rate": 0.0002960353182126366,
"loss": 1.0523,
"step": 22
},
{
"epoch": 0.13,
"learning_rate": 0.00029223214479705777,
"loss": 1.0495,
"step": 23
},
{
"epoch": 0.14,
"learning_rate": 0.0002881612468557375,
"loss": 0.9913,
"step": 24
},
{
"epoch": 0.14,
"learning_rate": 0.00028383082263536385,
"loss": 0.9737,
"step": 25
},
{
"epoch": 0.15,
"learning_rate": 0.00027924959303401233,
"loss": 1.0118,
"step": 26
},
{
"epoch": 0.15,
"learning_rate": 0.000274426784038418,
"loss": 1.0696,
"step": 27
},
{
"epoch": 0.16,
"learning_rate": 0.00026937210814406584,
"loss": 1.0735,
"step": 28
},
{
"epoch": 0.17,
"learning_rate": 0.00026409574479551763,
"loss": 1.0232,
"step": 29
},
{
"epoch": 0.17,
"learning_rate": 0.00025860831988636497,
"loss": 1.0537,
"step": 30
},
{
"epoch": 0.18,
"learning_rate": 0.00025292088436009396,
"loss": 1.0636,
"step": 31
},
{
"epoch": 0.18,
"learning_rate": 0.0002470448919549556,
"loss": 1.0604,
"step": 32
},
{
"epoch": 0.19,
"learning_rate": 0.00024099217613766146,
"loss": 1.0282,
"step": 33
},
{
"epoch": 0.19,
"learning_rate": 0.00023477492627235606,
"loss": 1.0354,
"step": 34
},
{
"epoch": 0.2,
"learning_rate": 0.00022840566307286026,
"loss": 1.0877,
"step": 35
},
{
"epoch": 0.21,
"learning_rate": 0.00022189721338761954,
"loss": 0.9974,
"step": 36
},
{
"epoch": 0.21,
"learning_rate": 0.00021526268436813839,
"loss": 1.069,
"step": 37
},
{
"epoch": 0.22,
"learning_rate": 0.0002085154370729214,
"loss": 1.0533,
"step": 38
},
{
"epoch": 0.22,
"learning_rate": 0.00020166905956007982,
"loss": 1.0155,
"step": 39
},
{
"epoch": 0.23,
"learning_rate": 0.00019473733952279058,
"loss": 0.9494,
"step": 40
},
{
"epoch": 0.23,
"learning_rate": 0.00018773423652271724,
"loss": 0.953,
"step": 41
},
{
"epoch": 0.24,
"learning_rate": 0.00018067385387731056,
"loss": 1.0139,
"step": 42
},
{
"epoch": 0.25,
"learning_rate": 0.0001735704102576041,
"loss": 0.915,
"step": 43
},
{
"epoch": 0.25,
"learning_rate": 0.00016643821105370306,
"loss": 0.8843,
"step": 44
},
{
"epoch": 0.26,
"learning_rate": 0.0001592916195656322,
"loss": 0.9219,
"step": 45
},
{
"epoch": 0.26,
"learning_rate": 0.0001521450280775614,
"loss": 0.9749,
"step": 46
},
{
"epoch": 0.27,
"learning_rate": 0.00014501282887366027,
"loss": 0.9495,
"step": 47
},
{
"epoch": 0.27,
"learning_rate": 0.00013790938525395387,
"loss": 1.0825,
"step": 48
},
{
"epoch": 0.28,
"learning_rate": 0.00013084900260854716,
"loss": 0.8,
"step": 49
},
{
"epoch": 0.29,
"learning_rate": 0.0001238458996084738,
"loss": 1.036,
"step": 50
},
{
"epoch": 0.29,
"learning_rate": 0.00011691417957118454,
"loss": 0.9924,
"step": 51
},
{
"epoch": 0.3,
"learning_rate": 0.00011006780205834297,
"loss": 0.9575,
"step": 52
},
{
"epoch": 0.3,
"learning_rate": 0.000103320554763126,
"loss": 1.0124,
"step": 53
},
{
"epoch": 0.31,
"learning_rate": 9.668602574364485e-05,
"loss": 1.0523,
"step": 54
},
{
"epoch": 0.31,
"learning_rate": 9.017757605840412e-05,
"loss": 1.1276,
"step": 55
},
{
"epoch": 0.32,
"learning_rate": 8.380831285890832e-05,
"loss": 0.9645,
"step": 56
},
{
"epoch": 0.33,
"learning_rate": 7.759106299360295e-05,
"loss": 0.9707,
"step": 57
},
{
"epoch": 0.33,
"learning_rate": 7.153834717630876e-05,
"loss": 1.0702,
"step": 58
},
{
"epoch": 0.34,
"learning_rate": 6.566235477117044e-05,
"loss": 1.0682,
"step": 59
},
{
"epoch": 0.34,
"learning_rate": 5.9974919244899423e-05,
"loss": 0.8695,
"step": 60
},
{
"epoch": 0.35,
"learning_rate": 5.448749433574675e-05,
"loss": 0.953,
"step": 61
},
{
"epoch": 0.35,
"learning_rate": 4.921113098719853e-05,
"loss": 0.9892,
"step": 62
},
{
"epoch": 0.36,
"learning_rate": 4.4156455092846426e-05,
"loss": 0.9447,
"step": 63
},
{
"epoch": 0.37,
"learning_rate": 3.933364609725209e-05,
"loss": 0.9473,
"step": 64
},
{
"epoch": 0.37,
"learning_rate": 3.4752416495900555e-05,
"loss": 0.906,
"step": 65
},
{
"epoch": 0.38,
"learning_rate": 3.042199227552688e-05,
"loss": 0.9664,
"step": 66
},
{
"epoch": 0.38,
"learning_rate": 2.635109433420662e-05,
"loss": 1.0343,
"step": 67
},
{
"epoch": 0.39,
"learning_rate": 2.2547920918627827e-05,
"loss": 0.96,
"step": 68
},
{
"epoch": 0.39,
"learning_rate": 1.9020131113913137e-05,
"loss": 0.8727,
"step": 69
},
{
"epoch": 0.4,
"learning_rate": 1.5774829419241735e-05,
"loss": 1.071,
"step": 70
},
{
"epoch": 0.41,
"learning_rate": 1.281855144033336e-05,
"loss": 0.8835,
"step": 71
},
{
"epoch": 0.41,
"learning_rate": 1.0157250727608305e-05,
"loss": 0.9211,
"step": 72
},
{
"epoch": 0.42,
"learning_rate": 7.796286786529122e-06,
"loss": 0.9842,
"step": 73
},
{
"epoch": 0.42,
"learning_rate": 5.740414284269837e-06,
"loss": 1.0236,
"step": 74
},
{
"epoch": 0.43,
"learning_rate": 3.993773474449073e-06,
"loss": 0.9883,
"step": 75
},
{
"epoch": 0.43,
"learning_rate": 2.5598818592102812e-06,
"loss": 0.9573,
"step": 76
},
{
"epoch": 0.44,
"learning_rate": 1.4416271054404854e-06,
"loss": 0.9061,
"step": 77
},
{
"epoch": 0.45,
"learning_rate": 6.412612293934594e-07,
"loss": 0.9517,
"step": 78
},
{
"epoch": 0.45,
"learning_rate": 1.6039606142882264e-07,
"loss": 0.9217,
"step": 79
},
{
"epoch": 0.46,
"learning_rate": 0.0,
"loss": 0.9262,
"step": 80
}
],
"logging_steps": 1,
"max_steps": 80,
"num_train_epochs": 1,
"save_steps": 10,
"total_flos": 1.1629371054302822e+17,
"trial_name": null,
"trial_params": null
}