diaenra's picture
Training in progress, step 239, checkpoint
3d63183 verified
raw
history blame
37.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.04019846943066185,
"eval_steps": 500,
"global_step": 239,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00016819443276427552,
"grad_norm": NaN,
"learning_rate": 1.0000000000000002e-06,
"loss": 1.2492,
"step": 1
},
{
"epoch": 0.00033638886552855103,
"grad_norm": NaN,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0,
"step": 2
},
{
"epoch": 0.0005045832982928265,
"grad_norm": NaN,
"learning_rate": 3e-06,
"loss": 0.0,
"step": 3
},
{
"epoch": 0.0006727777310571021,
"grad_norm": NaN,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0,
"step": 4
},
{
"epoch": 0.0008409721638213775,
"grad_norm": NaN,
"learning_rate": 5e-06,
"loss": 0.0,
"step": 5
},
{
"epoch": 0.001009166596585653,
"grad_norm": NaN,
"learning_rate": 6e-06,
"loss": 0.0,
"step": 6
},
{
"epoch": 0.0011773610293499286,
"grad_norm": NaN,
"learning_rate": 7.000000000000001e-06,
"loss": 0.0,
"step": 7
},
{
"epoch": 0.0013455554621142041,
"grad_norm": NaN,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0,
"step": 8
},
{
"epoch": 0.0015137498948784795,
"grad_norm": NaN,
"learning_rate": 9e-06,
"loss": 0.0,
"step": 9
},
{
"epoch": 0.001681944327642755,
"grad_norm": NaN,
"learning_rate": 1e-05,
"loss": 0.0,
"step": 10
},
{
"epoch": 0.0018501387604070306,
"grad_norm": NaN,
"learning_rate": 1.1000000000000001e-05,
"loss": 0.0,
"step": 11
},
{
"epoch": 0.002018333193171306,
"grad_norm": NaN,
"learning_rate": 1.2e-05,
"loss": 0.0,
"step": 12
},
{
"epoch": 0.0021865276259355813,
"grad_norm": NaN,
"learning_rate": 1.3000000000000001e-05,
"loss": 0.0,
"step": 13
},
{
"epoch": 0.002354722058699857,
"grad_norm": NaN,
"learning_rate": 1.4000000000000001e-05,
"loss": 0.0,
"step": 14
},
{
"epoch": 0.0025229164914641325,
"grad_norm": NaN,
"learning_rate": 1.5e-05,
"loss": 0.0,
"step": 15
},
{
"epoch": 0.0026911109242284082,
"grad_norm": NaN,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.0,
"step": 16
},
{
"epoch": 0.0028593053569926836,
"grad_norm": NaN,
"learning_rate": 1.7000000000000003e-05,
"loss": 0.0,
"step": 17
},
{
"epoch": 0.003027499789756959,
"grad_norm": NaN,
"learning_rate": 1.8e-05,
"loss": 0.0,
"step": 18
},
{
"epoch": 0.0031956942225212347,
"grad_norm": NaN,
"learning_rate": 1.9e-05,
"loss": 0.0,
"step": 19
},
{
"epoch": 0.00336388865528551,
"grad_norm": NaN,
"learning_rate": 2e-05,
"loss": 0.0,
"step": 20
},
{
"epoch": 0.0035320830880497854,
"grad_norm": NaN,
"learning_rate": 2.1e-05,
"loss": 0.0,
"step": 21
},
{
"epoch": 0.0037002775208140612,
"grad_norm": NaN,
"learning_rate": 2.2000000000000003e-05,
"loss": 0.0,
"step": 22
},
{
"epoch": 0.0038684719535783366,
"grad_norm": NaN,
"learning_rate": 2.3000000000000003e-05,
"loss": 0.0,
"step": 23
},
{
"epoch": 0.004036666386342612,
"grad_norm": NaN,
"learning_rate": 2.4e-05,
"loss": 0.0,
"step": 24
},
{
"epoch": 0.004204860819106888,
"grad_norm": NaN,
"learning_rate": 2.5e-05,
"loss": 0.0,
"step": 25
},
{
"epoch": 0.004373055251871163,
"grad_norm": NaN,
"learning_rate": 2.6000000000000002e-05,
"loss": 0.0,
"step": 26
},
{
"epoch": 0.004541249684635438,
"grad_norm": NaN,
"learning_rate": 2.7000000000000002e-05,
"loss": 0.0,
"step": 27
},
{
"epoch": 0.004709444117399714,
"grad_norm": NaN,
"learning_rate": 2.8000000000000003e-05,
"loss": 0.0,
"step": 28
},
{
"epoch": 0.00487763855016399,
"grad_norm": NaN,
"learning_rate": 2.9e-05,
"loss": 0.0,
"step": 29
},
{
"epoch": 0.005045832982928265,
"grad_norm": NaN,
"learning_rate": 3e-05,
"loss": 0.0,
"step": 30
},
{
"epoch": 0.005214027415692541,
"grad_norm": NaN,
"learning_rate": 3.1e-05,
"loss": 0.0,
"step": 31
},
{
"epoch": 0.0053822218484568165,
"grad_norm": NaN,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.0,
"step": 32
},
{
"epoch": 0.005550416281221091,
"grad_norm": NaN,
"learning_rate": 3.3e-05,
"loss": 0.0,
"step": 33
},
{
"epoch": 0.005718610713985367,
"grad_norm": NaN,
"learning_rate": 3.4000000000000007e-05,
"loss": 0.0,
"step": 34
},
{
"epoch": 0.005886805146749643,
"grad_norm": NaN,
"learning_rate": 3.5e-05,
"loss": 0.0,
"step": 35
},
{
"epoch": 0.006054999579513918,
"grad_norm": NaN,
"learning_rate": 3.6e-05,
"loss": 0.0,
"step": 36
},
{
"epoch": 0.006223194012278194,
"grad_norm": NaN,
"learning_rate": 3.7e-05,
"loss": 0.0,
"step": 37
},
{
"epoch": 0.0063913884450424695,
"grad_norm": NaN,
"learning_rate": 3.8e-05,
"loss": 0.0,
"step": 38
},
{
"epoch": 0.006559582877806744,
"grad_norm": NaN,
"learning_rate": 3.9000000000000006e-05,
"loss": 0.0,
"step": 39
},
{
"epoch": 0.00672777731057102,
"grad_norm": NaN,
"learning_rate": 4e-05,
"loss": 0.0,
"step": 40
},
{
"epoch": 0.006895971743335296,
"grad_norm": NaN,
"learning_rate": 4.1e-05,
"loss": 0.0,
"step": 41
},
{
"epoch": 0.007064166176099571,
"grad_norm": NaN,
"learning_rate": 4.2e-05,
"loss": 0.0,
"step": 42
},
{
"epoch": 0.007232360608863847,
"grad_norm": NaN,
"learning_rate": 4.3e-05,
"loss": 0.0,
"step": 43
},
{
"epoch": 0.0074005550416281225,
"grad_norm": NaN,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.0,
"step": 44
},
{
"epoch": 0.007568749474392397,
"grad_norm": NaN,
"learning_rate": 4.5e-05,
"loss": 0.0,
"step": 45
},
{
"epoch": 0.007736943907156673,
"grad_norm": NaN,
"learning_rate": 4.600000000000001e-05,
"loss": 0.0,
"step": 46
},
{
"epoch": 0.007905138339920948,
"grad_norm": NaN,
"learning_rate": 4.7e-05,
"loss": 0.0,
"step": 47
},
{
"epoch": 0.008073332772685224,
"grad_norm": NaN,
"learning_rate": 4.8e-05,
"loss": 0.0,
"step": 48
},
{
"epoch": 0.0082415272054495,
"grad_norm": NaN,
"learning_rate": 4.9e-05,
"loss": 0.0,
"step": 49
},
{
"epoch": 0.008409721638213775,
"grad_norm": NaN,
"learning_rate": 5e-05,
"loss": 0.0,
"step": 50
},
{
"epoch": 0.008577916070978051,
"grad_norm": NaN,
"learning_rate": 5.1000000000000006e-05,
"loss": 0.0,
"step": 51
},
{
"epoch": 0.008746110503742325,
"grad_norm": NaN,
"learning_rate": 5.2000000000000004e-05,
"loss": 0.0,
"step": 52
},
{
"epoch": 0.008914304936506601,
"grad_norm": NaN,
"learning_rate": 5.300000000000001e-05,
"loss": 0.0,
"step": 53
},
{
"epoch": 0.009082499369270877,
"grad_norm": NaN,
"learning_rate": 5.4000000000000005e-05,
"loss": 0.0,
"step": 54
},
{
"epoch": 0.009250693802035153,
"grad_norm": NaN,
"learning_rate": 5.500000000000001e-05,
"loss": 0.0,
"step": 55
},
{
"epoch": 0.009418888234799428,
"grad_norm": NaN,
"learning_rate": 5.6000000000000006e-05,
"loss": 0.0,
"step": 56
},
{
"epoch": 0.009587082667563704,
"grad_norm": NaN,
"learning_rate": 5.6999999999999996e-05,
"loss": 0.0,
"step": 57
},
{
"epoch": 0.00975527710032798,
"grad_norm": NaN,
"learning_rate": 5.8e-05,
"loss": 0.0,
"step": 58
},
{
"epoch": 0.009923471533092254,
"grad_norm": NaN,
"learning_rate": 5.9e-05,
"loss": 0.0,
"step": 59
},
{
"epoch": 0.01009166596585653,
"grad_norm": NaN,
"learning_rate": 6e-05,
"loss": 0.0,
"step": 60
},
{
"epoch": 0.010259860398620806,
"grad_norm": NaN,
"learning_rate": 6.1e-05,
"loss": 0.0,
"step": 61
},
{
"epoch": 0.010428054831385081,
"grad_norm": NaN,
"learning_rate": 6.2e-05,
"loss": 0.0,
"step": 62
},
{
"epoch": 0.010596249264149357,
"grad_norm": NaN,
"learning_rate": 6.3e-05,
"loss": 0.0,
"step": 63
},
{
"epoch": 0.010764443696913633,
"grad_norm": NaN,
"learning_rate": 6.400000000000001e-05,
"loss": 0.0,
"step": 64
},
{
"epoch": 0.010932638129677907,
"grad_norm": NaN,
"learning_rate": 6.500000000000001e-05,
"loss": 0.0,
"step": 65
},
{
"epoch": 0.011100832562442183,
"grad_norm": NaN,
"learning_rate": 6.6e-05,
"loss": 0.0,
"step": 66
},
{
"epoch": 0.011269026995206459,
"grad_norm": NaN,
"learning_rate": 6.7e-05,
"loss": 0.0,
"step": 67
},
{
"epoch": 0.011437221427970734,
"grad_norm": NaN,
"learning_rate": 6.800000000000001e-05,
"loss": 0.0,
"step": 68
},
{
"epoch": 0.01160541586073501,
"grad_norm": NaN,
"learning_rate": 6.9e-05,
"loss": 0.0,
"step": 69
},
{
"epoch": 0.011773610293499286,
"grad_norm": NaN,
"learning_rate": 7e-05,
"loss": 0.0,
"step": 70
},
{
"epoch": 0.01194180472626356,
"grad_norm": NaN,
"learning_rate": 7.1e-05,
"loss": 0.0,
"step": 71
},
{
"epoch": 0.012109999159027836,
"grad_norm": NaN,
"learning_rate": 7.2e-05,
"loss": 0.0,
"step": 72
},
{
"epoch": 0.012278193591792112,
"grad_norm": NaN,
"learning_rate": 7.3e-05,
"loss": 0.0,
"step": 73
},
{
"epoch": 0.012446388024556387,
"grad_norm": NaN,
"learning_rate": 7.4e-05,
"loss": 0.0,
"step": 74
},
{
"epoch": 0.012614582457320663,
"grad_norm": NaN,
"learning_rate": 7.500000000000001e-05,
"loss": 0.0,
"step": 75
},
{
"epoch": 0.012782776890084939,
"grad_norm": NaN,
"learning_rate": 7.6e-05,
"loss": 0.0,
"step": 76
},
{
"epoch": 0.012950971322849213,
"grad_norm": NaN,
"learning_rate": 7.7e-05,
"loss": 0.0,
"step": 77
},
{
"epoch": 0.013119165755613489,
"grad_norm": NaN,
"learning_rate": 7.800000000000001e-05,
"loss": 0.0,
"step": 78
},
{
"epoch": 0.013287360188377765,
"grad_norm": NaN,
"learning_rate": 7.900000000000001e-05,
"loss": 0.0,
"step": 79
},
{
"epoch": 0.01345555462114204,
"grad_norm": NaN,
"learning_rate": 8e-05,
"loss": 0.0,
"step": 80
},
{
"epoch": 0.013623749053906316,
"grad_norm": NaN,
"learning_rate": 8.1e-05,
"loss": 0.0,
"step": 81
},
{
"epoch": 0.013791943486670592,
"grad_norm": NaN,
"learning_rate": 8.2e-05,
"loss": 0.0,
"step": 82
},
{
"epoch": 0.013960137919434866,
"grad_norm": NaN,
"learning_rate": 8.3e-05,
"loss": 0.0,
"step": 83
},
{
"epoch": 0.014128332352199142,
"grad_norm": NaN,
"learning_rate": 8.4e-05,
"loss": 0.0,
"step": 84
},
{
"epoch": 0.014296526784963418,
"grad_norm": NaN,
"learning_rate": 8.5e-05,
"loss": 0.0,
"step": 85
},
{
"epoch": 0.014464721217727693,
"grad_norm": NaN,
"learning_rate": 8.6e-05,
"loss": 0.0,
"step": 86
},
{
"epoch": 0.014632915650491969,
"grad_norm": NaN,
"learning_rate": 8.7e-05,
"loss": 0.0,
"step": 87
},
{
"epoch": 0.014801110083256245,
"grad_norm": NaN,
"learning_rate": 8.800000000000001e-05,
"loss": 0.0,
"step": 88
},
{
"epoch": 0.014969304516020519,
"grad_norm": NaN,
"learning_rate": 8.900000000000001e-05,
"loss": 0.0,
"step": 89
},
{
"epoch": 0.015137498948784795,
"grad_norm": NaN,
"learning_rate": 9e-05,
"loss": 0.0,
"step": 90
},
{
"epoch": 0.01530569338154907,
"grad_norm": NaN,
"learning_rate": 9.1e-05,
"loss": 0.0,
"step": 91
},
{
"epoch": 0.015473887814313346,
"grad_norm": NaN,
"learning_rate": 9.200000000000001e-05,
"loss": 0.0,
"step": 92
},
{
"epoch": 0.01564208224707762,
"grad_norm": NaN,
"learning_rate": 9.300000000000001e-05,
"loss": 0.0,
"step": 93
},
{
"epoch": 0.015810276679841896,
"grad_norm": NaN,
"learning_rate": 9.4e-05,
"loss": 0.0,
"step": 94
},
{
"epoch": 0.015978471112606172,
"grad_norm": NaN,
"learning_rate": 9.5e-05,
"loss": 0.0,
"step": 95
},
{
"epoch": 0.016146665545370448,
"grad_norm": NaN,
"learning_rate": 9.6e-05,
"loss": 0.0,
"step": 96
},
{
"epoch": 0.016314859978134724,
"grad_norm": NaN,
"learning_rate": 9.7e-05,
"loss": 0.0,
"step": 97
},
{
"epoch": 0.016483054410899,
"grad_norm": NaN,
"learning_rate": 9.8e-05,
"loss": 0.0,
"step": 98
},
{
"epoch": 0.016651248843663275,
"grad_norm": NaN,
"learning_rate": 9.900000000000001e-05,
"loss": 0.0,
"step": 99
},
{
"epoch": 0.01681944327642755,
"grad_norm": NaN,
"learning_rate": 0.0001,
"loss": 0.0,
"step": 100
},
{
"epoch": 0.016987637709191827,
"grad_norm": NaN,
"learning_rate": 9.999999277778003e-05,
"loss": 0.0,
"step": 101
},
{
"epoch": 0.017155832141956102,
"grad_norm": NaN,
"learning_rate": 9.999997111112216e-05,
"loss": 0.0,
"step": 102
},
{
"epoch": 0.017324026574720378,
"grad_norm": NaN,
"learning_rate": 9.999993500003267e-05,
"loss": 0.0,
"step": 103
},
{
"epoch": 0.01749222100748465,
"grad_norm": NaN,
"learning_rate": 9.999988444452199e-05,
"loss": 0.0,
"step": 104
},
{
"epoch": 0.017660415440248926,
"grad_norm": NaN,
"learning_rate": 9.999981944460473e-05,
"loss": 0.0,
"step": 105
},
{
"epoch": 0.017828609873013202,
"grad_norm": NaN,
"learning_rate": 9.999974000029966e-05,
"loss": 0.0,
"step": 106
},
{
"epoch": 0.017996804305777478,
"grad_norm": NaN,
"learning_rate": 9.999964611162974e-05,
"loss": 0.0,
"step": 107
},
{
"epoch": 0.018164998738541754,
"grad_norm": NaN,
"learning_rate": 9.999953777862207e-05,
"loss": 0.0,
"step": 108
},
{
"epoch": 0.01833319317130603,
"grad_norm": NaN,
"learning_rate": 9.999941500130797e-05,
"loss": 0.0,
"step": 109
},
{
"epoch": 0.018501387604070305,
"grad_norm": NaN,
"learning_rate": 9.99992777797229e-05,
"loss": 0.0,
"step": 110
},
{
"epoch": 0.01866958203683458,
"grad_norm": NaN,
"learning_rate": 9.999912611390651e-05,
"loss": 0.0,
"step": 111
},
{
"epoch": 0.018837776469598857,
"grad_norm": NaN,
"learning_rate": 9.999896000390261e-05,
"loss": 0.0,
"step": 112
},
{
"epoch": 0.019005970902363133,
"grad_norm": NaN,
"learning_rate": 9.999877944975917e-05,
"loss": 0.0,
"step": 113
},
{
"epoch": 0.01917416533512741,
"grad_norm": NaN,
"learning_rate": 9.999858445152839e-05,
"loss": 0.0,
"step": 114
},
{
"epoch": 0.019342359767891684,
"grad_norm": NaN,
"learning_rate": 9.999837500926656e-05,
"loss": 0.0,
"step": 115
},
{
"epoch": 0.01951055420065596,
"grad_norm": NaN,
"learning_rate": 9.99981511230342e-05,
"loss": 0.0,
"step": 116
},
{
"epoch": 0.019678748633420232,
"grad_norm": NaN,
"learning_rate": 9.999791279289601e-05,
"loss": 0.0,
"step": 117
},
{
"epoch": 0.019846943066184508,
"grad_norm": NaN,
"learning_rate": 9.999766001892081e-05,
"loss": 0.0,
"step": 118
},
{
"epoch": 0.020015137498948784,
"grad_norm": NaN,
"learning_rate": 9.999739280118163e-05,
"loss": 0.0,
"step": 119
},
{
"epoch": 0.02018333193171306,
"grad_norm": NaN,
"learning_rate": 9.999711113975568e-05,
"loss": 0.0,
"step": 120
},
{
"epoch": 0.020351526364477335,
"grad_norm": NaN,
"learning_rate": 9.999681503472433e-05,
"loss": 0.0,
"step": 121
},
{
"epoch": 0.02051972079724161,
"grad_norm": NaN,
"learning_rate": 9.99965044861731e-05,
"loss": 0.0,
"step": 122
},
{
"epoch": 0.020687915230005887,
"grad_norm": NaN,
"learning_rate": 9.999617949419174e-05,
"loss": 0.0,
"step": 123
},
{
"epoch": 0.020856109662770163,
"grad_norm": NaN,
"learning_rate": 9.999584005887407e-05,
"loss": 0.0,
"step": 124
},
{
"epoch": 0.02102430409553444,
"grad_norm": NaN,
"learning_rate": 9.999548618031823e-05,
"loss": 0.0,
"step": 125
},
{
"epoch": 0.021192498528298714,
"grad_norm": NaN,
"learning_rate": 9.99951178586264e-05,
"loss": 0.0,
"step": 126
},
{
"epoch": 0.02136069296106299,
"grad_norm": NaN,
"learning_rate": 9.9994735093905e-05,
"loss": 0.0,
"step": 127
},
{
"epoch": 0.021528887393827266,
"grad_norm": NaN,
"learning_rate": 9.999433788626461e-05,
"loss": 0.0,
"step": 128
},
{
"epoch": 0.02169708182659154,
"grad_norm": NaN,
"learning_rate": 9.999392623581997e-05,
"loss": 0.0,
"step": 129
},
{
"epoch": 0.021865276259355814,
"grad_norm": NaN,
"learning_rate": 9.999350014269e-05,
"loss": 0.0,
"step": 130
},
{
"epoch": 0.02203347069212009,
"grad_norm": NaN,
"learning_rate": 9.999305960699781e-05,
"loss": 0.0,
"step": 131
},
{
"epoch": 0.022201665124884366,
"grad_norm": NaN,
"learning_rate": 9.999260462887064e-05,
"loss": 0.0,
"step": 132
},
{
"epoch": 0.02236985955764864,
"grad_norm": NaN,
"learning_rate": 9.999213520843994e-05,
"loss": 0.0,
"step": 133
},
{
"epoch": 0.022538053990412917,
"grad_norm": NaN,
"learning_rate": 9.999165134584133e-05,
"loss": 0.0,
"step": 134
},
{
"epoch": 0.022706248423177193,
"grad_norm": NaN,
"learning_rate": 9.999115304121457e-05,
"loss": 0.0,
"step": 135
},
{
"epoch": 0.02287444285594147,
"grad_norm": NaN,
"learning_rate": 9.999064029470366e-05,
"loss": 0.0,
"step": 136
},
{
"epoch": 0.023042637288705745,
"grad_norm": NaN,
"learning_rate": 9.999011310645668e-05,
"loss": 0.0,
"step": 137
},
{
"epoch": 0.02321083172147002,
"grad_norm": NaN,
"learning_rate": 9.998957147662594e-05,
"loss": 0.0,
"step": 138
},
{
"epoch": 0.023379026154234296,
"grad_norm": NaN,
"learning_rate": 9.998901540536792e-05,
"loss": 0.0,
"step": 139
},
{
"epoch": 0.023547220586998572,
"grad_norm": NaN,
"learning_rate": 9.998844489284327e-05,
"loss": 0.0,
"step": 140
},
{
"epoch": 0.023715415019762844,
"grad_norm": NaN,
"learning_rate": 9.998785993921678e-05,
"loss": 0.0,
"step": 141
},
{
"epoch": 0.02388360945252712,
"grad_norm": NaN,
"learning_rate": 9.998726054465744e-05,
"loss": 0.0,
"step": 142
},
{
"epoch": 0.024051803885291396,
"grad_norm": NaN,
"learning_rate": 9.998664670933844e-05,
"loss": 0.0,
"step": 143
},
{
"epoch": 0.02421999831805567,
"grad_norm": NaN,
"learning_rate": 9.998601843343707e-05,
"loss": 0.0,
"step": 144
},
{
"epoch": 0.024388192750819947,
"grad_norm": NaN,
"learning_rate": 9.998537571713487e-05,
"loss": 0.0,
"step": 145
},
{
"epoch": 0.024556387183584223,
"grad_norm": NaN,
"learning_rate": 9.998471856061747e-05,
"loss": 0.0,
"step": 146
},
{
"epoch": 0.0247245816163485,
"grad_norm": NaN,
"learning_rate": 9.998404696407476e-05,
"loss": 0.0,
"step": 147
},
{
"epoch": 0.024892776049112775,
"grad_norm": NaN,
"learning_rate": 9.998336092770073e-05,
"loss": 0.0,
"step": 148
},
{
"epoch": 0.02506097048187705,
"grad_norm": NaN,
"learning_rate": 9.998266045169356e-05,
"loss": 0.0,
"step": 149
},
{
"epoch": 0.025229164914641326,
"grad_norm": NaN,
"learning_rate": 9.998194553625563e-05,
"loss": 0.0,
"step": 150
},
{
"epoch": 0.025397359347405602,
"grad_norm": NaN,
"learning_rate": 9.998121618159346e-05,
"loss": 0.0,
"step": 151
},
{
"epoch": 0.025565553780169878,
"grad_norm": NaN,
"learning_rate": 9.998047238791777e-05,
"loss": 0.0,
"step": 152
},
{
"epoch": 0.02573374821293415,
"grad_norm": NaN,
"learning_rate": 9.997971415544341e-05,
"loss": 0.0,
"step": 153
},
{
"epoch": 0.025901942645698426,
"grad_norm": NaN,
"learning_rate": 9.997894148438944e-05,
"loss": 0.0,
"step": 154
},
{
"epoch": 0.026070137078462702,
"grad_norm": NaN,
"learning_rate": 9.997815437497908e-05,
"loss": 0.0,
"step": 155
},
{
"epoch": 0.026238331511226978,
"grad_norm": NaN,
"learning_rate": 9.997735282743969e-05,
"loss": 0.0,
"step": 156
},
{
"epoch": 0.026406525943991253,
"grad_norm": NaN,
"learning_rate": 9.997653684200286e-05,
"loss": 0.0,
"step": 157
},
{
"epoch": 0.02657472037675553,
"grad_norm": NaN,
"learning_rate": 9.99757064189043e-05,
"loss": 0.0,
"step": 158
},
{
"epoch": 0.026742914809519805,
"grad_norm": NaN,
"learning_rate": 9.997486155838392e-05,
"loss": 0.0,
"step": 159
},
{
"epoch": 0.02691110924228408,
"grad_norm": NaN,
"learning_rate": 9.997400226068578e-05,
"loss": 0.0,
"step": 160
},
{
"epoch": 0.027079303675048357,
"grad_norm": NaN,
"learning_rate": 9.997312852605814e-05,
"loss": 0.0,
"step": 161
},
{
"epoch": 0.027247498107812632,
"grad_norm": NaN,
"learning_rate": 9.997224035475339e-05,
"loss": 0.0,
"step": 162
},
{
"epoch": 0.027415692540576908,
"grad_norm": NaN,
"learning_rate": 9.997133774702812e-05,
"loss": 0.0,
"step": 163
},
{
"epoch": 0.027583886973341184,
"grad_norm": NaN,
"learning_rate": 9.997042070314309e-05,
"loss": 0.0,
"step": 164
},
{
"epoch": 0.027752081406105456,
"grad_norm": NaN,
"learning_rate": 9.996948922336323e-05,
"loss": 0.0,
"step": 165
},
{
"epoch": 0.027920275838869732,
"grad_norm": NaN,
"learning_rate": 9.996854330795761e-05,
"loss": 0.0,
"step": 166
},
{
"epoch": 0.028088470271634008,
"grad_norm": NaN,
"learning_rate": 9.996758295719951e-05,
"loss": 0.0,
"step": 167
},
{
"epoch": 0.028256664704398284,
"grad_norm": NaN,
"learning_rate": 9.996660817136636e-05,
"loss": 0.0,
"step": 168
},
{
"epoch": 0.02842485913716256,
"grad_norm": NaN,
"learning_rate": 9.996561895073976e-05,
"loss": 0.0,
"step": 169
},
{
"epoch": 0.028593053569926835,
"grad_norm": NaN,
"learning_rate": 9.996461529560553e-05,
"loss": 0.0,
"step": 170
},
{
"epoch": 0.02876124800269111,
"grad_norm": NaN,
"learning_rate": 9.996359720625354e-05,
"loss": 0.0,
"step": 171
},
{
"epoch": 0.028929442435455387,
"grad_norm": NaN,
"learning_rate": 9.996256468297795e-05,
"loss": 0.0,
"step": 172
},
{
"epoch": 0.029097636868219662,
"grad_norm": NaN,
"learning_rate": 9.996151772607704e-05,
"loss": 0.0,
"step": 173
},
{
"epoch": 0.029265831300983938,
"grad_norm": NaN,
"learning_rate": 9.996045633585326e-05,
"loss": 0.0,
"step": 174
},
{
"epoch": 0.029434025733748214,
"grad_norm": NaN,
"learning_rate": 9.995938051261324e-05,
"loss": 0.0,
"step": 175
},
{
"epoch": 0.02960222016651249,
"grad_norm": NaN,
"learning_rate": 9.995829025666775e-05,
"loss": 0.0,
"step": 176
},
{
"epoch": 0.029770414599276766,
"grad_norm": NaN,
"learning_rate": 9.995718556833178e-05,
"loss": 0.0,
"step": 177
},
{
"epoch": 0.029938609032041038,
"grad_norm": NaN,
"learning_rate": 9.995606644792446e-05,
"loss": 0.0,
"step": 178
},
{
"epoch": 0.030106803464805314,
"grad_norm": NaN,
"learning_rate": 9.995493289576907e-05,
"loss": 0.0,
"step": 179
},
{
"epoch": 0.03027499789756959,
"grad_norm": NaN,
"learning_rate": 9.99537849121931e-05,
"loss": 0.0,
"step": 180
},
{
"epoch": 0.030443192330333865,
"grad_norm": NaN,
"learning_rate": 9.995262249752817e-05,
"loss": 0.0,
"step": 181
},
{
"epoch": 0.03061138676309814,
"grad_norm": NaN,
"learning_rate": 9.995144565211012e-05,
"loss": 0.0,
"step": 182
},
{
"epoch": 0.030779581195862417,
"grad_norm": NaN,
"learning_rate": 9.99502543762789e-05,
"loss": 0.0,
"step": 183
},
{
"epoch": 0.030947775628626693,
"grad_norm": NaN,
"learning_rate": 9.994904867037867e-05,
"loss": 0.0,
"step": 184
},
{
"epoch": 0.03111597006139097,
"grad_norm": NaN,
"learning_rate": 9.994782853475774e-05,
"loss": 0.0,
"step": 185
},
{
"epoch": 0.03128416449415524,
"grad_norm": NaN,
"learning_rate": 9.994659396976859e-05,
"loss": 0.0,
"step": 186
},
{
"epoch": 0.03145235892691952,
"grad_norm": NaN,
"learning_rate": 9.994534497576787e-05,
"loss": 0.0,
"step": 187
},
{
"epoch": 0.03162055335968379,
"grad_norm": NaN,
"learning_rate": 9.994408155311642e-05,
"loss": 0.0,
"step": 188
},
{
"epoch": 0.03178874779244807,
"grad_norm": NaN,
"learning_rate": 9.994280370217922e-05,
"loss": 0.0,
"step": 189
},
{
"epoch": 0.031956942225212344,
"grad_norm": NaN,
"learning_rate": 9.99415114233254e-05,
"loss": 0.0,
"step": 190
},
{
"epoch": 0.03212513665797662,
"grad_norm": NaN,
"learning_rate": 9.994020471692833e-05,
"loss": 0.0,
"step": 191
},
{
"epoch": 0.032293331090740895,
"grad_norm": NaN,
"learning_rate": 9.993888358336545e-05,
"loss": 0.0,
"step": 192
},
{
"epoch": 0.032461525523505175,
"grad_norm": NaN,
"learning_rate": 9.993754802301847e-05,
"loss": 0.0,
"step": 193
},
{
"epoch": 0.03262971995626945,
"grad_norm": NaN,
"learning_rate": 9.993619803627321e-05,
"loss": 0.0,
"step": 194
},
{
"epoch": 0.032797914389033726,
"grad_norm": NaN,
"learning_rate": 9.993483362351963e-05,
"loss": 0.0,
"step": 195
},
{
"epoch": 0.032966108821798,
"grad_norm": NaN,
"learning_rate": 9.993345478515194e-05,
"loss": 0.0,
"step": 196
},
{
"epoch": 0.03313430325456227,
"grad_norm": NaN,
"learning_rate": 9.993206152156844e-05,
"loss": 0.0,
"step": 197
},
{
"epoch": 0.03330249768732655,
"grad_norm": NaN,
"learning_rate": 9.993065383317163e-05,
"loss": 0.0,
"step": 198
},
{
"epoch": 0.03347069212009082,
"grad_norm": NaN,
"learning_rate": 9.992923172036819e-05,
"loss": 0.0,
"step": 199
},
{
"epoch": 0.0336388865528551,
"grad_norm": NaN,
"learning_rate": 9.992779518356896e-05,
"loss": 0.0,
"step": 200
},
{
"epoch": 0.033807080985619374,
"grad_norm": NaN,
"learning_rate": 9.99263442231889e-05,
"loss": 0.0,
"step": 201
},
{
"epoch": 0.03397527541838365,
"grad_norm": NaN,
"learning_rate": 9.99248788396472e-05,
"loss": 0.0,
"step": 202
},
{
"epoch": 0.034143469851147926,
"grad_norm": NaN,
"learning_rate": 9.992339903336722e-05,
"loss": 0.0,
"step": 203
},
{
"epoch": 0.034311664283912205,
"grad_norm": NaN,
"learning_rate": 9.992190480477641e-05,
"loss": 0.0,
"step": 204
},
{
"epoch": 0.03447985871667648,
"grad_norm": NaN,
"learning_rate": 9.992039615430648e-05,
"loss": 0.0,
"step": 205
},
{
"epoch": 0.034648053149440756,
"grad_norm": NaN,
"learning_rate": 9.991887308239322e-05,
"loss": 0.0,
"step": 206
},
{
"epoch": 0.03481624758220503,
"grad_norm": NaN,
"learning_rate": 9.991733558947667e-05,
"loss": 0.0,
"step": 207
},
{
"epoch": 0.0349844420149693,
"grad_norm": NaN,
"learning_rate": 9.991578367600096e-05,
"loss": 0.0,
"step": 208
},
{
"epoch": 0.03515263644773358,
"grad_norm": NaN,
"learning_rate": 9.991421734241444e-05,
"loss": 0.0,
"step": 209
},
{
"epoch": 0.03532083088049785,
"grad_norm": NaN,
"learning_rate": 9.99126365891696e-05,
"loss": 0.0,
"step": 210
},
{
"epoch": 0.03548902531326213,
"grad_norm": NaN,
"learning_rate": 9.991104141672309e-05,
"loss": 0.0,
"step": 211
},
{
"epoch": 0.035657219746026404,
"grad_norm": NaN,
"learning_rate": 9.990943182553579e-05,
"loss": 0.0,
"step": 212
},
{
"epoch": 0.035825414178790684,
"grad_norm": NaN,
"learning_rate": 9.990780781607261e-05,
"loss": 0.0,
"step": 213
},
{
"epoch": 0.035993608611554956,
"grad_norm": NaN,
"learning_rate": 9.990616938880278e-05,
"loss": 0.0,
"step": 214
},
{
"epoch": 0.036161803044319235,
"grad_norm": NaN,
"learning_rate": 9.990451654419957e-05,
"loss": 0.0,
"step": 215
},
{
"epoch": 0.03632999747708351,
"grad_norm": NaN,
"learning_rate": 9.990284928274051e-05,
"loss": 0.0,
"step": 216
},
{
"epoch": 0.03649819190984779,
"grad_norm": NaN,
"learning_rate": 9.990116760490723e-05,
"loss": 0.0,
"step": 217
},
{
"epoch": 0.03666638634261206,
"grad_norm": NaN,
"learning_rate": 9.989947151118555e-05,
"loss": 0.0,
"step": 218
},
{
"epoch": 0.03683458077537634,
"grad_norm": NaN,
"learning_rate": 9.989776100206548e-05,
"loss": 0.0,
"step": 219
},
{
"epoch": 0.03700277520814061,
"grad_norm": NaN,
"learning_rate": 9.989603607804112e-05,
"loss": 0.0,
"step": 220
},
{
"epoch": 0.03717096964090488,
"grad_norm": NaN,
"learning_rate": 9.98942967396108e-05,
"loss": 0.0,
"step": 221
},
{
"epoch": 0.03733916407366916,
"grad_norm": NaN,
"learning_rate": 9.989254298727702e-05,
"loss": 0.0,
"step": 222
},
{
"epoch": 0.037507358506433434,
"grad_norm": NaN,
"learning_rate": 9.989077482154638e-05,
"loss": 0.0,
"step": 223
},
{
"epoch": 0.037675552939197714,
"grad_norm": NaN,
"learning_rate": 9.988899224292971e-05,
"loss": 0.0,
"step": 224
},
{
"epoch": 0.037843747371961986,
"grad_norm": NaN,
"learning_rate": 9.988719525194198e-05,
"loss": 0.0,
"step": 225
},
{
"epoch": 0.038011941804726265,
"grad_norm": NaN,
"learning_rate": 9.988538384910231e-05,
"loss": 0.0,
"step": 226
},
{
"epoch": 0.03818013623749054,
"grad_norm": NaN,
"learning_rate": 9.988355803493398e-05,
"loss": 0.0,
"step": 227
},
{
"epoch": 0.03834833067025482,
"grad_norm": NaN,
"learning_rate": 9.988171780996446e-05,
"loss": 0.0,
"step": 228
},
{
"epoch": 0.03851652510301909,
"grad_norm": NaN,
"learning_rate": 9.987986317472539e-05,
"loss": 0.0,
"step": 229
},
{
"epoch": 0.03868471953578337,
"grad_norm": NaN,
"learning_rate": 9.987799412975252e-05,
"loss": 0.0,
"step": 230
},
{
"epoch": 0.03885291396854764,
"grad_norm": NaN,
"learning_rate": 9.987611067558582e-05,
"loss": 0.0,
"step": 231
},
{
"epoch": 0.03902110840131192,
"grad_norm": NaN,
"learning_rate": 9.987421281276939e-05,
"loss": 0.0,
"step": 232
},
{
"epoch": 0.03918930283407619,
"grad_norm": NaN,
"learning_rate": 9.98723005418515e-05,
"loss": 0.0,
"step": 233
},
{
"epoch": 0.039357497266840465,
"grad_norm": NaN,
"learning_rate": 9.987037386338458e-05,
"loss": 0.0,
"step": 234
},
{
"epoch": 0.039525691699604744,
"grad_norm": NaN,
"learning_rate": 9.986843277792523e-05,
"loss": 0.0,
"step": 235
},
{
"epoch": 0.039693886132369016,
"grad_norm": NaN,
"learning_rate": 9.986647728603422e-05,
"loss": 0.0,
"step": 236
},
{
"epoch": 0.039862080565133295,
"grad_norm": NaN,
"learning_rate": 9.986450738827646e-05,
"loss": 0.0,
"step": 237
},
{
"epoch": 0.04003027499789757,
"grad_norm": NaN,
"learning_rate": 9.986252308522101e-05,
"loss": 0.0,
"step": 238
},
{
"epoch": 0.04019846943066185,
"grad_norm": NaN,
"learning_rate": 9.986052437744115e-05,
"loss": 0.0,
"step": 239
}
],
"logging_steps": 1,
"max_steps": 5945,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 239,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4501042330337280.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}