File size: 9,411 Bytes
7e36054 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.957345971563981,
"eval_steps": 100,
"global_step": 156,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"grad_norm": 132.97353908293687,
"learning_rate": 3.125e-08,
"logits/chosen": 123.11854553222656,
"logits/rejected": 97.00198364257812,
"logps/chosen": -425.18585205078125,
"logps/rejected": -424.1869201660156,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.19,
"grad_norm": 206.0883100010928,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": 117.39097595214844,
"logits/rejected": 136.3163299560547,
"logps/chosen": -442.6399230957031,
"logps/rejected": -524.91015625,
"loss": 0.7186,
"rewards/accuracies": 0.4930555522441864,
"rewards/chosen": 0.037425246089696884,
"rewards/margins": 0.07718456536531448,
"rewards/rejected": -0.0397593155503273,
"step": 10
},
{
"epoch": 0.38,
"grad_norm": 114.8435303205146,
"learning_rate": 4.989935734988097e-07,
"logits/chosen": 125.3319091796875,
"logits/rejected": 132.9754638671875,
"logps/chosen": -422.8042907714844,
"logps/rejected": -491.63226318359375,
"loss": 0.6164,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.17301546037197113,
"rewards/margins": 0.379099041223526,
"rewards/rejected": -0.20608356595039368,
"step": 20
},
{
"epoch": 0.57,
"grad_norm": 99.27143207986335,
"learning_rate": 4.877641290737883e-07,
"logits/chosen": 122.47686767578125,
"logits/rejected": 125.91865539550781,
"logps/chosen": -466.9618225097656,
"logps/rejected": -540.3817138671875,
"loss": 0.5813,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.5920838117599487,
"rewards/margins": 1.0662639141082764,
"rewards/rejected": -1.658347725868225,
"step": 30
},
{
"epoch": 0.76,
"grad_norm": 109.14521515462766,
"learning_rate": 4.646121984004665e-07,
"logits/chosen": 124.97059631347656,
"logits/rejected": 119.9173583984375,
"logps/chosen": -497.7147521972656,
"logps/rejected": -527.3887939453125,
"loss": 0.5426,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -1.2266700267791748,
"rewards/margins": 0.9045358896255493,
"rewards/rejected": -2.1312055587768555,
"step": 40
},
{
"epoch": 0.95,
"grad_norm": 115.6113002085735,
"learning_rate": 4.3069871595684787e-07,
"logits/chosen": 132.8910369873047,
"logits/rejected": 133.22190856933594,
"logps/chosen": -520.63037109375,
"logps/rejected": -549.1149291992188,
"loss": 0.5202,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -1.8903814554214478,
"rewards/margins": 1.0029468536376953,
"rewards/rejected": -2.8933284282684326,
"step": 50
},
{
"epoch": 1.14,
"grad_norm": 46.9650475313439,
"learning_rate": 3.877242453630256e-07,
"logits/chosen": 131.47854614257812,
"logits/rejected": 134.71681213378906,
"logps/chosen": -481.8072814941406,
"logps/rejected": -534.0516357421875,
"loss": 0.2837,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -1.2520891427993774,
"rewards/margins": 2.2355263233184814,
"rewards/rejected": -3.4876155853271484,
"step": 60
},
{
"epoch": 1.33,
"grad_norm": 47.26485069523079,
"learning_rate": 3.378437060203357e-07,
"logits/chosen": 126.1490707397461,
"logits/rejected": 126.75111389160156,
"logps/chosen": -452.6795349121094,
"logps/rejected": -579.5133056640625,
"loss": 0.1756,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -1.188291072845459,
"rewards/margins": 2.6805100440979004,
"rewards/rejected": -3.868800640106201,
"step": 70
},
{
"epoch": 1.52,
"grad_norm": 46.43874254029814,
"learning_rate": 2.8355831645441387e-07,
"logits/chosen": 127.46858978271484,
"logits/rejected": 128.4056396484375,
"logps/chosen": -514.4637451171875,
"logps/rejected": -621.2301635742188,
"loss": 0.1711,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -1.268179178237915,
"rewards/margins": 3.4565296173095703,
"rewards/rejected": -4.724708557128906,
"step": 80
},
{
"epoch": 1.71,
"grad_norm": 49.11808633093636,
"learning_rate": 2.2759017277414164e-07,
"logits/chosen": 112.5447998046875,
"logits/rejected": 114.98893737792969,
"logps/chosen": -497.70001220703125,
"logps/rejected": -589.730224609375,
"loss": 0.1524,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -1.7766218185424805,
"rewards/margins": 3.491291046142578,
"rewards/rejected": -5.2679123878479,
"step": 90
},
{
"epoch": 1.9,
"grad_norm": 47.47224806448749,
"learning_rate": 1.7274575140626315e-07,
"logits/chosen": 124.581787109375,
"logits/rejected": 115.68563079833984,
"logps/chosen": -516.1900634765625,
"logps/rejected": -632.6817626953125,
"loss": 0.1623,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -1.8899990320205688,
"rewards/margins": 3.6050896644592285,
"rewards/rejected": -5.495089054107666,
"step": 100
},
{
"epoch": 1.9,
"eval_logits/chosen": 93.8502197265625,
"eval_logits/rejected": 87.7247543334961,
"eval_logps/chosen": -512.8825073242188,
"eval_logps/rejected": -541.5043334960938,
"eval_loss": 0.48611319065093994,
"eval_rewards/accuracies": 0.6770833134651184,
"eval_rewards/chosen": -2.9739017486572266,
"eval_rewards/margins": 1.5238369703292847,
"eval_rewards/rejected": -4.497739315032959,
"eval_runtime": 53.4905,
"eval_samples_per_second": 14.021,
"eval_steps_per_second": 0.449,
"step": 100
},
{
"epoch": 2.09,
"grad_norm": 25.494387206609645,
"learning_rate": 1.2177518064852348e-07,
"logits/chosen": 102.986083984375,
"logits/rejected": 116.60546875,
"logps/chosen": -538.074951171875,
"logps/rejected": -667.3218383789062,
"loss": 0.1318,
"rewards/accuracies": 0.96875,
"rewards/chosen": -2.1493353843688965,
"rewards/margins": 3.4991326332092285,
"rewards/rejected": -5.648468017578125,
"step": 110
},
{
"epoch": 2.27,
"grad_norm": 24.60483354043265,
"learning_rate": 7.723433775328384e-08,
"logits/chosen": 113.220703125,
"logits/rejected": 114.29705810546875,
"logps/chosen": -522.1823120117188,
"logps/rejected": -628.0721435546875,
"loss": 0.0837,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.2452592849731445,
"rewards/margins": 3.996518611907959,
"rewards/rejected": -6.2417778968811035,
"step": 120
},
{
"epoch": 2.46,
"grad_norm": 32.75955455536007,
"learning_rate": 4.1356686569674335e-08,
"logits/chosen": 115.95035552978516,
"logits/rejected": 120.65645599365234,
"logps/chosen": -537.8087158203125,
"logps/rejected": -653.7862548828125,
"loss": 0.0781,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -2.2025058269500732,
"rewards/margins": 4.200939178466797,
"rewards/rejected": -6.403443813323975,
"step": 130
},
{
"epoch": 2.65,
"grad_norm": 23.375967561613557,
"learning_rate": 1.5941282340065697e-08,
"logits/chosen": 101.51383972167969,
"logits/rejected": 102.2659683227539,
"logps/chosen": -499.16229248046875,
"logps/rejected": -645.9388427734375,
"loss": 0.0791,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -2.6822938919067383,
"rewards/margins": 4.259942054748535,
"rewards/rejected": -6.942234992980957,
"step": 140
},
{
"epoch": 2.84,
"grad_norm": 27.725216044545164,
"learning_rate": 2.2625595580163247e-09,
"logits/chosen": 108.08512878417969,
"logits/rejected": 121.6434097290039,
"logps/chosen": -524.5687866210938,
"logps/rejected": -647.0615844726562,
"loss": 0.079,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -2.486959457397461,
"rewards/margins": 4.458805084228516,
"rewards/rejected": -6.945765018463135,
"step": 150
},
{
"epoch": 2.96,
"step": 156,
"total_flos": 0.0,
"train_loss": 0.28389122929328525,
"train_runtime": 1811.0132,
"train_samples_per_second": 11.182,
"train_steps_per_second": 0.086
}
],
"logging_steps": 10,
"max_steps": 156,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|