samantha-1.2-mistral-7b / trainer_state.json
ehartford's picture
Upload folder using huggingface_hub
13c50e3
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 34.0,
"eval_steps": 10,
"global_step": 187,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.18,
"learning_rate": 0.0,
"loss": 2.0388,
"step": 1
},
{
"epoch": 0.18,
"eval_loss": 14.199334144592285,
"eval_runtime": 1.4513,
"eval_samples_per_second": 45.477,
"eval_steps_per_second": 2.067,
"step": 1
},
{
"epoch": 0.36,
"learning_rate": 6.000000000000001e-08,
"loss": 11.2905,
"step": 2
},
{
"epoch": 0.55,
"learning_rate": 1.2000000000000002e-07,
"loss": 11.3574,
"step": 3
},
{
"epoch": 0.73,
"learning_rate": 1.8e-07,
"loss": 10.3132,
"step": 4
},
{
"epoch": 0.91,
"learning_rate": 2.4000000000000003e-07,
"loss": 8.2949,
"step": 5
},
{
"epoch": 1.09,
"learning_rate": 3.0000000000000004e-07,
"loss": 5.3189,
"step": 6
},
{
"epoch": 1.27,
"learning_rate": 3.6e-07,
"loss": 2.2121,
"step": 7
},
{
"epoch": 1.45,
"learning_rate": 4.2000000000000006e-07,
"loss": 2.0155,
"step": 8
},
{
"epoch": 1.64,
"learning_rate": 4.800000000000001e-07,
"loss": 1.9936,
"step": 9
},
{
"epoch": 1.82,
"learning_rate": 5.4e-07,
"loss": 1.8803,
"step": 10
},
{
"epoch": 1.82,
"eval_loss": 2.1710054874420166,
"eval_runtime": 1.463,
"eval_samples_per_second": 45.113,
"eval_steps_per_second": 2.051,
"step": 10
},
{
"epoch": 2.0,
"learning_rate": 6.000000000000001e-07,
"loss": 1.8794,
"step": 11
},
{
"epoch": 2.18,
"learning_rate": 6.6e-07,
"loss": 1.884,
"step": 12
},
{
"epoch": 2.36,
"learning_rate": 7.2e-07,
"loss": 1.8201,
"step": 13
},
{
"epoch": 2.55,
"learning_rate": 7.8e-07,
"loss": 1.8356,
"step": 14
},
{
"epoch": 2.73,
"learning_rate": 8.400000000000001e-07,
"loss": 1.7696,
"step": 15
},
{
"epoch": 2.91,
"learning_rate": 9e-07,
"loss": 1.6433,
"step": 16
},
{
"epoch": 3.09,
"learning_rate": 9.600000000000001e-07,
"loss": 1.5564,
"step": 17
},
{
"epoch": 3.27,
"learning_rate": 1.0200000000000002e-06,
"loss": 1.4976,
"step": 18
},
{
"epoch": 3.45,
"learning_rate": 1.08e-06,
"loss": 1.4484,
"step": 19
},
{
"epoch": 3.64,
"learning_rate": 1.14e-06,
"loss": 1.4277,
"step": 20
},
{
"epoch": 3.64,
"eval_loss": 1.5980714559555054,
"eval_runtime": 1.4588,
"eval_samples_per_second": 45.243,
"eval_steps_per_second": 2.056,
"step": 20
},
{
"epoch": 3.82,
"learning_rate": 1.2000000000000002e-06,
"loss": 1.4004,
"step": 21
},
{
"epoch": 4.0,
"learning_rate": 1.26e-06,
"loss": 1.3963,
"step": 22
},
{
"epoch": 4.18,
"learning_rate": 1.32e-06,
"loss": 1.3915,
"step": 23
},
{
"epoch": 4.36,
"learning_rate": 1.3800000000000001e-06,
"loss": 1.3803,
"step": 24
},
{
"epoch": 4.55,
"learning_rate": 1.44e-06,
"loss": 1.3711,
"step": 25
},
{
"epoch": 4.73,
"learning_rate": 1.5e-06,
"loss": 1.3596,
"step": 26
},
{
"epoch": 4.91,
"learning_rate": 1.56e-06,
"loss": 1.3376,
"step": 27
},
{
"epoch": 5.09,
"learning_rate": 1.6200000000000002e-06,
"loss": 1.3241,
"step": 28
},
{
"epoch": 5.27,
"learning_rate": 1.6800000000000002e-06,
"loss": 1.2975,
"step": 29
},
{
"epoch": 5.45,
"learning_rate": 1.7399999999999999e-06,
"loss": 1.2847,
"step": 30
},
{
"epoch": 5.45,
"eval_loss": 1.4864555597305298,
"eval_runtime": 1.4609,
"eval_samples_per_second": 45.176,
"eval_steps_per_second": 2.053,
"step": 30
},
{
"epoch": 5.64,
"learning_rate": 1.8e-06,
"loss": 1.2798,
"step": 31
},
{
"epoch": 5.82,
"learning_rate": 1.86e-06,
"loss": 1.2616,
"step": 32
},
{
"epoch": 6.0,
"learning_rate": 1.9200000000000003e-06,
"loss": 1.252,
"step": 33
},
{
"epoch": 6.18,
"learning_rate": 1.98e-06,
"loss": 1.2398,
"step": 34
},
{
"epoch": 6.36,
"learning_rate": 2.0400000000000004e-06,
"loss": 1.2229,
"step": 35
},
{
"epoch": 6.55,
"learning_rate": 2.1e-06,
"loss": 1.2221,
"step": 36
},
{
"epoch": 6.73,
"learning_rate": 2.16e-06,
"loss": 1.2046,
"step": 37
},
{
"epoch": 6.91,
"learning_rate": 2.22e-06,
"loss": 1.1908,
"step": 38
},
{
"epoch": 7.09,
"learning_rate": 2.28e-06,
"loss": 1.1916,
"step": 39
},
{
"epoch": 7.27,
"learning_rate": 2.34e-06,
"loss": 1.1733,
"step": 40
},
{
"epoch": 7.27,
"eval_loss": 1.37030827999115,
"eval_runtime": 1.4628,
"eval_samples_per_second": 45.118,
"eval_steps_per_second": 2.051,
"step": 40
},
{
"epoch": 7.45,
"learning_rate": 2.4000000000000003e-06,
"loss": 1.1708,
"step": 41
},
{
"epoch": 7.64,
"learning_rate": 2.4599999999999997e-06,
"loss": 1.1727,
"step": 42
},
{
"epoch": 7.82,
"learning_rate": 2.52e-06,
"loss": 1.1615,
"step": 43
},
{
"epoch": 8.0,
"learning_rate": 2.58e-06,
"loss": 1.1529,
"step": 44
},
{
"epoch": 8.18,
"learning_rate": 2.64e-06,
"loss": 1.1428,
"step": 45
},
{
"epoch": 8.36,
"learning_rate": 2.7e-06,
"loss": 1.1398,
"step": 46
},
{
"epoch": 8.55,
"learning_rate": 2.7600000000000003e-06,
"loss": 1.1392,
"step": 47
},
{
"epoch": 8.73,
"learning_rate": 2.82e-06,
"loss": 1.121,
"step": 48
},
{
"epoch": 8.91,
"learning_rate": 2.88e-06,
"loss": 1.1239,
"step": 49
},
{
"epoch": 9.09,
"learning_rate": 2.9400000000000002e-06,
"loss": 1.1131,
"step": 50
},
{
"epoch": 9.09,
"eval_loss": 1.3062629699707031,
"eval_runtime": 1.4506,
"eval_samples_per_second": 45.499,
"eval_steps_per_second": 2.068,
"step": 50
},
{
"epoch": 9.27,
"learning_rate": 3e-06,
"loss": 1.1047,
"step": 51
},
{
"epoch": 9.45,
"learning_rate": 3.06e-06,
"loss": 1.1012,
"step": 52
},
{
"epoch": 9.64,
"learning_rate": 3.12e-06,
"loss": 1.0987,
"step": 53
},
{
"epoch": 9.82,
"learning_rate": 3.18e-06,
"loss": 1.1012,
"step": 54
},
{
"epoch": 10.0,
"learning_rate": 3.2400000000000003e-06,
"loss": 1.093,
"step": 55
},
{
"epoch": 10.18,
"learning_rate": 3.3e-06,
"loss": 1.0876,
"step": 56
},
{
"epoch": 10.36,
"learning_rate": 3.3600000000000004e-06,
"loss": 1.0813,
"step": 57
},
{
"epoch": 10.55,
"learning_rate": 3.42e-06,
"loss": 1.0798,
"step": 58
},
{
"epoch": 10.73,
"learning_rate": 3.4799999999999997e-06,
"loss": 1.0673,
"step": 59
},
{
"epoch": 10.91,
"learning_rate": 3.54e-06,
"loss": 1.0616,
"step": 60
},
{
"epoch": 10.91,
"eval_loss": 1.2495708465576172,
"eval_runtime": 1.4605,
"eval_samples_per_second": 45.19,
"eval_steps_per_second": 2.054,
"step": 60
},
{
"epoch": 11.09,
"learning_rate": 3.6e-06,
"loss": 1.0639,
"step": 61
},
{
"epoch": 11.27,
"learning_rate": 3.66e-06,
"loss": 1.0538,
"step": 62
},
{
"epoch": 11.45,
"learning_rate": 3.72e-06,
"loss": 1.0567,
"step": 63
},
{
"epoch": 11.64,
"learning_rate": 3.7800000000000002e-06,
"loss": 1.0465,
"step": 64
},
{
"epoch": 11.82,
"learning_rate": 3.8400000000000005e-06,
"loss": 1.0485,
"step": 65
},
{
"epoch": 12.0,
"learning_rate": 3.9e-06,
"loss": 1.0494,
"step": 66
},
{
"epoch": 12.18,
"learning_rate": 3.96e-06,
"loss": 1.0359,
"step": 67
},
{
"epoch": 12.36,
"learning_rate": 4.0200000000000005e-06,
"loss": 1.0336,
"step": 68
},
{
"epoch": 12.55,
"learning_rate": 4.080000000000001e-06,
"loss": 1.0317,
"step": 69
},
{
"epoch": 12.73,
"learning_rate": 4.14e-06,
"loss": 1.0302,
"step": 70
},
{
"epoch": 12.73,
"eval_loss": 1.2099440097808838,
"eval_runtime": 1.4611,
"eval_samples_per_second": 45.172,
"eval_steps_per_second": 2.053,
"step": 70
},
{
"epoch": 12.91,
"learning_rate": 4.2e-06,
"loss": 1.0249,
"step": 71
},
{
"epoch": 13.09,
"learning_rate": 4.26e-06,
"loss": 1.022,
"step": 72
},
{
"epoch": 13.27,
"learning_rate": 4.32e-06,
"loss": 1.0138,
"step": 73
},
{
"epoch": 13.45,
"learning_rate": 4.38e-06,
"loss": 1.0136,
"step": 74
},
{
"epoch": 13.64,
"learning_rate": 4.44e-06,
"loss": 1.0169,
"step": 75
},
{
"epoch": 13.82,
"learning_rate": 4.5e-06,
"loss": 1.0041,
"step": 76
},
{
"epoch": 14.0,
"learning_rate": 4.56e-06,
"loss": 0.9956,
"step": 77
},
{
"epoch": 14.18,
"learning_rate": 4.62e-06,
"loss": 0.9941,
"step": 78
},
{
"epoch": 14.36,
"learning_rate": 4.68e-06,
"loss": 0.9963,
"step": 79
},
{
"epoch": 14.55,
"learning_rate": 4.74e-06,
"loss": 0.9866,
"step": 80
},
{
"epoch": 14.55,
"eval_loss": 1.1811805963516235,
"eval_runtime": 1.4593,
"eval_samples_per_second": 45.228,
"eval_steps_per_second": 2.056,
"step": 80
},
{
"epoch": 14.73,
"learning_rate": 4.800000000000001e-06,
"loss": 0.9904,
"step": 81
},
{
"epoch": 14.91,
"learning_rate": 4.86e-06,
"loss": 0.9849,
"step": 82
},
{
"epoch": 15.09,
"learning_rate": 4.9199999999999995e-06,
"loss": 0.9784,
"step": 83
},
{
"epoch": 15.27,
"learning_rate": 4.98e-06,
"loss": 0.9801,
"step": 84
},
{
"epoch": 15.45,
"learning_rate": 5.04e-06,
"loss": 0.9749,
"step": 85
},
{
"epoch": 15.64,
"learning_rate": 5.1e-06,
"loss": 0.9667,
"step": 86
},
{
"epoch": 15.82,
"learning_rate": 5.16e-06,
"loss": 0.9675,
"step": 87
},
{
"epoch": 16.0,
"learning_rate": 5.22e-06,
"loss": 0.9666,
"step": 88
},
{
"epoch": 16.18,
"learning_rate": 5.28e-06,
"loss": 0.9582,
"step": 89
},
{
"epoch": 16.36,
"learning_rate": 5.3400000000000005e-06,
"loss": 0.9594,
"step": 90
},
{
"epoch": 16.36,
"eval_loss": 1.1599087715148926,
"eval_runtime": 1.4615,
"eval_samples_per_second": 45.16,
"eval_steps_per_second": 2.053,
"step": 90
},
{
"epoch": 16.55,
"learning_rate": 5.4e-06,
"loss": 0.9462,
"step": 91
},
{
"epoch": 16.73,
"learning_rate": 5.46e-06,
"loss": 0.952,
"step": 92
},
{
"epoch": 16.91,
"learning_rate": 5.5200000000000005e-06,
"loss": 0.9507,
"step": 93
},
{
"epoch": 17.09,
"learning_rate": 5.580000000000001e-06,
"loss": 0.9535,
"step": 94
},
{
"epoch": 17.27,
"learning_rate": 5.64e-06,
"loss": 0.9436,
"step": 95
},
{
"epoch": 17.45,
"learning_rate": 5.7e-06,
"loss": 0.9338,
"step": 96
},
{
"epoch": 17.64,
"learning_rate": 5.76e-06,
"loss": 0.9355,
"step": 97
},
{
"epoch": 17.82,
"learning_rate": 5.82e-06,
"loss": 0.9355,
"step": 98
},
{
"epoch": 18.0,
"learning_rate": 5.8800000000000005e-06,
"loss": 0.9343,
"step": 99
},
{
"epoch": 18.18,
"learning_rate": 5.94e-06,
"loss": 0.9161,
"step": 100
},
{
"epoch": 18.18,
"eval_loss": 1.1503610610961914,
"eval_runtime": 1.4537,
"eval_samples_per_second": 45.4,
"eval_steps_per_second": 2.064,
"step": 100
},
{
"epoch": 18.36,
"learning_rate": 6e-06,
"loss": 0.9156,
"step": 101
},
{
"epoch": 18.55,
"learning_rate": 5.94e-06,
"loss": 0.9185,
"step": 102
},
{
"epoch": 18.73,
"learning_rate": 5.8800000000000005e-06,
"loss": 0.9155,
"step": 103
},
{
"epoch": 18.91,
"learning_rate": 5.82e-06,
"loss": 0.9114,
"step": 104
},
{
"epoch": 19.09,
"learning_rate": 5.76e-06,
"loss": 0.8976,
"step": 105
},
{
"epoch": 19.27,
"learning_rate": 5.7e-06,
"loss": 0.8985,
"step": 106
},
{
"epoch": 19.45,
"learning_rate": 5.64e-06,
"loss": 0.8972,
"step": 107
},
{
"epoch": 19.64,
"learning_rate": 5.580000000000001e-06,
"loss": 0.8961,
"step": 108
},
{
"epoch": 19.82,
"learning_rate": 5.5200000000000005e-06,
"loss": 0.8948,
"step": 109
},
{
"epoch": 20.0,
"learning_rate": 5.46e-06,
"loss": 0.8823,
"step": 110
},
{
"epoch": 20.0,
"eval_loss": 1.13896906375885,
"eval_runtime": 1.4633,
"eval_samples_per_second": 45.103,
"eval_steps_per_second": 2.05,
"step": 110
},
{
"epoch": 20.18,
"learning_rate": 5.4e-06,
"loss": 0.8689,
"step": 111
},
{
"epoch": 20.36,
"learning_rate": 5.3400000000000005e-06,
"loss": 0.8832,
"step": 112
},
{
"epoch": 20.55,
"learning_rate": 5.28e-06,
"loss": 0.875,
"step": 113
},
{
"epoch": 20.73,
"learning_rate": 5.22e-06,
"loss": 0.8655,
"step": 114
},
{
"epoch": 20.91,
"learning_rate": 5.16e-06,
"loss": 0.8695,
"step": 115
},
{
"epoch": 21.09,
"learning_rate": 5.1e-06,
"loss": 0.8636,
"step": 116
},
{
"epoch": 21.27,
"learning_rate": 5.04e-06,
"loss": 0.8513,
"step": 117
},
{
"epoch": 21.45,
"learning_rate": 4.98e-06,
"loss": 0.8523,
"step": 118
},
{
"epoch": 21.64,
"learning_rate": 4.9199999999999995e-06,
"loss": 0.8497,
"step": 119
},
{
"epoch": 21.82,
"learning_rate": 4.86e-06,
"loss": 0.8441,
"step": 120
},
{
"epoch": 21.82,
"eval_loss": 1.1353310346603394,
"eval_runtime": 1.4613,
"eval_samples_per_second": 45.164,
"eval_steps_per_second": 2.053,
"step": 120
},
{
"epoch": 22.0,
"learning_rate": 4.800000000000001e-06,
"loss": 0.8543,
"step": 121
},
{
"epoch": 22.18,
"learning_rate": 4.74e-06,
"loss": 0.838,
"step": 122
},
{
"epoch": 22.36,
"learning_rate": 4.68e-06,
"loss": 0.8348,
"step": 123
},
{
"epoch": 22.55,
"learning_rate": 4.62e-06,
"loss": 0.8215,
"step": 124
},
{
"epoch": 22.73,
"learning_rate": 4.56e-06,
"loss": 0.8313,
"step": 125
},
{
"epoch": 22.91,
"learning_rate": 4.5e-06,
"loss": 0.8303,
"step": 126
},
{
"epoch": 23.09,
"learning_rate": 4.44e-06,
"loss": 0.8188,
"step": 127
},
{
"epoch": 23.27,
"learning_rate": 4.38e-06,
"loss": 0.812,
"step": 128
},
{
"epoch": 23.45,
"learning_rate": 4.32e-06,
"loss": 0.8077,
"step": 129
},
{
"epoch": 23.64,
"learning_rate": 4.26e-06,
"loss": 0.7956,
"step": 130
},
{
"epoch": 23.64,
"eval_loss": 1.1432788372039795,
"eval_runtime": 1.461,
"eval_samples_per_second": 45.176,
"eval_steps_per_second": 2.053,
"step": 130
},
{
"epoch": 23.82,
"learning_rate": 4.2e-06,
"loss": 0.8099,
"step": 131
},
{
"epoch": 24.0,
"learning_rate": 4.14e-06,
"loss": 0.8223,
"step": 132
},
{
"epoch": 24.18,
"learning_rate": 4.080000000000001e-06,
"loss": 0.7956,
"step": 133
},
{
"epoch": 24.36,
"learning_rate": 4.0200000000000005e-06,
"loss": 0.7889,
"step": 134
},
{
"epoch": 24.55,
"learning_rate": 3.96e-06,
"loss": 0.7844,
"step": 135
},
{
"epoch": 24.73,
"learning_rate": 3.9e-06,
"loss": 0.7867,
"step": 136
},
{
"epoch": 24.91,
"learning_rate": 3.8400000000000005e-06,
"loss": 0.7875,
"step": 137
},
{
"epoch": 25.09,
"learning_rate": 3.7800000000000002e-06,
"loss": 0.7804,
"step": 138
},
{
"epoch": 25.27,
"learning_rate": 3.72e-06,
"loss": 0.7661,
"step": 139
},
{
"epoch": 25.45,
"learning_rate": 3.66e-06,
"loss": 0.7696,
"step": 140
},
{
"epoch": 25.45,
"eval_loss": 1.1572602987289429,
"eval_runtime": 1.4603,
"eval_samples_per_second": 45.197,
"eval_steps_per_second": 2.054,
"step": 140
},
{
"epoch": 25.64,
"learning_rate": 3.6e-06,
"loss": 0.7531,
"step": 141
},
{
"epoch": 25.82,
"learning_rate": 3.54e-06,
"loss": 0.7538,
"step": 142
},
{
"epoch": 26.0,
"learning_rate": 3.4799999999999997e-06,
"loss": 0.7574,
"step": 143
},
{
"epoch": 26.18,
"learning_rate": 3.42e-06,
"loss": 0.746,
"step": 144
},
{
"epoch": 26.36,
"learning_rate": 3.3600000000000004e-06,
"loss": 0.742,
"step": 145
},
{
"epoch": 26.55,
"learning_rate": 3.3e-06,
"loss": 0.7371,
"step": 146
},
{
"epoch": 26.73,
"learning_rate": 3.2400000000000003e-06,
"loss": 0.7355,
"step": 147
},
{
"epoch": 26.91,
"learning_rate": 3.18e-06,
"loss": 0.7352,
"step": 148
},
{
"epoch": 27.09,
"learning_rate": 3.12e-06,
"loss": 0.7263,
"step": 149
},
{
"epoch": 27.27,
"learning_rate": 3.06e-06,
"loss": 0.7113,
"step": 150
},
{
"epoch": 27.27,
"eval_loss": 1.1966105699539185,
"eval_runtime": 1.4607,
"eval_samples_per_second": 45.185,
"eval_steps_per_second": 2.054,
"step": 150
},
{
"epoch": 27.45,
"learning_rate": 3e-06,
"loss": 0.7142,
"step": 151
},
{
"epoch": 27.64,
"learning_rate": 2.9400000000000002e-06,
"loss": 0.7189,
"step": 152
},
{
"epoch": 27.82,
"learning_rate": 2.88e-06,
"loss": 0.7131,
"step": 153
},
{
"epoch": 28.0,
"learning_rate": 2.82e-06,
"loss": 0.7054,
"step": 154
},
{
"epoch": 28.18,
"learning_rate": 2.7600000000000003e-06,
"loss": 0.6965,
"step": 155
},
{
"epoch": 28.36,
"learning_rate": 2.7e-06,
"loss": 0.6879,
"step": 156
},
{
"epoch": 28.55,
"learning_rate": 2.64e-06,
"loss": 0.6872,
"step": 157
},
{
"epoch": 28.73,
"learning_rate": 2.58e-06,
"loss": 0.6909,
"step": 158
},
{
"epoch": 28.91,
"learning_rate": 2.52e-06,
"loss": 0.6734,
"step": 159
},
{
"epoch": 29.09,
"learning_rate": 2.4599999999999997e-06,
"loss": 0.6646,
"step": 160
},
{
"epoch": 29.09,
"eval_loss": 1.2236099243164062,
"eval_runtime": 1.4503,
"eval_samples_per_second": 45.508,
"eval_steps_per_second": 2.069,
"step": 160
},
{
"epoch": 29.27,
"learning_rate": 2.4000000000000003e-06,
"loss": 0.6715,
"step": 161
},
{
"epoch": 29.45,
"learning_rate": 2.34e-06,
"loss": 0.6621,
"step": 162
},
{
"epoch": 29.64,
"learning_rate": 2.28e-06,
"loss": 0.6483,
"step": 163
},
{
"epoch": 29.82,
"learning_rate": 2.22e-06,
"loss": 0.6487,
"step": 164
},
{
"epoch": 30.0,
"learning_rate": 2.16e-06,
"loss": 0.6553,
"step": 165
},
{
"epoch": 30.18,
"learning_rate": 2.1e-06,
"loss": 0.6344,
"step": 166
},
{
"epoch": 30.36,
"learning_rate": 2.0400000000000004e-06,
"loss": 0.6334,
"step": 167
},
{
"epoch": 30.55,
"learning_rate": 1.98e-06,
"loss": 0.6311,
"step": 168
},
{
"epoch": 30.73,
"learning_rate": 1.9200000000000003e-06,
"loss": 0.6262,
"step": 169
},
{
"epoch": 30.91,
"learning_rate": 1.86e-06,
"loss": 0.6303,
"step": 170
},
{
"epoch": 30.91,
"eval_loss": 1.2599583864212036,
"eval_runtime": 1.4611,
"eval_samples_per_second": 45.171,
"eval_steps_per_second": 2.053,
"step": 170
},
{
"epoch": 31.09,
"learning_rate": 1.8e-06,
"loss": 0.6196,
"step": 171
},
{
"epoch": 31.27,
"learning_rate": 1.7399999999999999e-06,
"loss": 0.5957,
"step": 172
},
{
"epoch": 31.45,
"learning_rate": 1.6800000000000002e-06,
"loss": 0.5994,
"step": 173
},
{
"epoch": 31.64,
"learning_rate": 1.6200000000000002e-06,
"loss": 0.6109,
"step": 174
},
{
"epoch": 31.82,
"learning_rate": 1.56e-06,
"loss": 0.61,
"step": 175
},
{
"epoch": 32.0,
"learning_rate": 1.5e-06,
"loss": 0.5957,
"step": 176
},
{
"epoch": 32.18,
"learning_rate": 1.44e-06,
"loss": 0.5862,
"step": 177
},
{
"epoch": 32.36,
"learning_rate": 1.3800000000000001e-06,
"loss": 0.5799,
"step": 178
},
{
"epoch": 32.55,
"learning_rate": 1.32e-06,
"loss": 0.5788,
"step": 179
},
{
"epoch": 32.73,
"learning_rate": 1.26e-06,
"loss": 0.5741,
"step": 180
},
{
"epoch": 32.73,
"eval_loss": 1.3407286405563354,
"eval_runtime": 1.4616,
"eval_samples_per_second": 45.157,
"eval_steps_per_second": 2.053,
"step": 180
},
{
"epoch": 32.91,
"learning_rate": 1.2000000000000002e-06,
"loss": 0.5745,
"step": 181
},
{
"epoch": 33.09,
"learning_rate": 1.14e-06,
"loss": 0.5654,
"step": 182
},
{
"epoch": 33.27,
"learning_rate": 1.08e-06,
"loss": 0.5587,
"step": 183
},
{
"epoch": 33.45,
"learning_rate": 1.0200000000000002e-06,
"loss": 0.5539,
"step": 184
},
{
"epoch": 33.64,
"learning_rate": 9.600000000000001e-07,
"loss": 0.558,
"step": 185
},
{
"epoch": 33.82,
"learning_rate": 9e-07,
"loss": 0.5571,
"step": 186
},
{
"epoch": 34.0,
"learning_rate": 8.400000000000001e-07,
"loss": 0.5496,
"step": 187
}
],
"logging_steps": 1,
"max_steps": 200,
"num_train_epochs": 40,
"save_steps": 500,
"total_flos": 6.274288047202763e+18,
"trial_name": null,
"trial_params": null
}