AlekseyKorshuk commited on
Commit
e118768
1 Parent(s): 0c9afed

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/morgenshtern")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/19ghjsta/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MORGENSHTERN's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3the5qdy) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3the5qdy/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/morgenshtern")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1qveqjla/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MORGENSHTERN's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/21tohu9l) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/21tohu9l/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -35,7 +35,7 @@
35
  }
36
  },
37
  "torch_dtype": "float32",
38
- "transformers_version": "4.11.2",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
 
35
  }
36
  },
37
  "torch_dtype": "float32",
38
+ "transformers_version": "4.11.3",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 0.8179315328598022, "eval_runtime": 7.3257, "eval_samples_per_second": 21.158, "eval_steps_per_second": 2.73, "epoch": 3.0}
 
1
+ {"eval_loss": 0.8691701889038086, "eval_runtime": 5.5874, "eval_samples_per_second": 22.014, "eval_steps_per_second": 2.864, "epoch": 4.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bfe735b97d1edf92f1881ff486f7bbdb5e1c909ebc2fe3797e20c998d462eef
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dcb9fd0c95e49a7c5c6a915f2d7adaa973be6d1ae19ff9baad7139b0524ce2b
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c09c40bf4dfd5fd306d702098d94d3bbdc47999df4e071987a459220d393d0d
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b9057c2e701481d1224ab71cb3e30e9a63129f8298087d242e9dbef4d013a3c
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e79396262fb67544ae8b77d0955613d7d836ace7dac0bfc78d519268a6d4270c
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fff17a1d5c7cb31c35913396acbea4856780b0256c64b1a8d4aaee9270629fc7
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad6fe78f0f512530d4eb0b75148f257776f4270926c8eeb3605dc42ed450ad9f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9ebc7a18dd93d6b68e430a5a307587231b2cf1f61199f2bfb8080060d317195
3
  size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:592ef97b2b907edd507cff14ca8b3cd3c3eaddc58fa33aedd4b1cf49372e69c1
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b46773a8e0da9899f0332e86140882baf8db0e4d332cd8d2b17d122dbb838867
3
  size 623
trainer_state.json CHANGED
@@ -1,388 +1,272 @@
1
  {
2
- "best_metric": 0.8179315328598022,
3
- "best_model_checkpoint": "output/morgenshtern/checkpoint-291",
4
- "epoch": 3.0,
5
- "global_step": 291,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.05,
12
- "learning_rate": 2.8353852816851834e-06,
13
- "loss": 0.9329,
14
  "step": 5
15
  },
16
  {
17
  "epoch": 0.1,
18
- "learning_rate": 5.632050517253893e-07,
19
- "loss": 1.0399,
20
  "step": 10
21
  },
22
  {
23
  "epoch": 0.15,
24
- "learning_rate": 3.5245568632818114e-08,
25
- "loss": 0.9758,
26
  "step": 15
27
  },
28
  {
29
  "epoch": 0.2,
30
- "learning_rate": 1.2650418304129032e-06,
31
- "loss": 1.1379,
32
  "step": 20
33
  },
34
  {
35
- "epoch": 0.26,
36
- "learning_rate": 4.2210662473863345e-06,
37
- "loss": 1.0053,
38
  "step": 25
39
  },
40
  {
41
- "epoch": 0.31,
42
- "learning_rate": 8.827536897135236e-06,
43
- "loss": 0.9426,
44
  "step": 30
45
  },
46
  {
47
- "epoch": 0.36,
48
- "learning_rate": 1.4966360302693292e-05,
49
- "loss": 0.975,
50
  "step": 35
51
  },
52
  {
53
- "epoch": 0.41,
54
- "learning_rate": 2.2480158928073662e-05,
55
- "loss": 0.9562,
56
  "step": 40
57
  },
58
  {
59
- "epoch": 0.46,
60
- "learning_rate": 3.1176305776956165e-05,
61
- "loss": 0.9848,
62
  "step": 45
63
  },
64
  {
65
- "epoch": 0.51,
66
- "learning_rate": 4.08318626618038e-05,
67
- "loss": 0.9881,
68
  "step": 50
69
  },
70
  {
71
- "epoch": 0.56,
72
- "learning_rate": 5.11992955438076e-05,
73
- "loss": 0.8541,
74
  "step": 55
75
  },
76
  {
77
- "epoch": 0.61,
78
- "learning_rate": 6.201282042273309e-05,
79
- "loss": 0.9165,
80
  "step": 60
81
  },
82
  {
83
- "epoch": 0.66,
84
- "learning_rate": 7.299521709067675e-05,
85
- "loss": 1.1571,
86
  "step": 65
87
  },
88
  {
89
- "epoch": 0.71,
90
- "learning_rate": 8.386493606940326e-05,
91
- "loss": 0.9756,
92
  "step": 70
93
  },
94
  {
95
- "epoch": 0.77,
96
- "learning_rate": 9.434331653472495e-05,
97
- "loss": 1.0831,
98
  "step": 75
99
  },
100
  {
101
- "epoch": 0.82,
102
- "learning_rate": 0.00010416173018610171,
103
- "loss": 1.0254,
104
  "step": 80
105
  },
106
  {
107
- "epoch": 0.87,
108
- "learning_rate": 0.00011306846791811384,
109
- "loss": 0.9955,
110
  "step": 85
111
  },
112
  {
113
- "epoch": 0.92,
114
- "learning_rate": 0.00012083519274412272,
115
- "loss": 0.9694,
116
  "step": 90
117
  },
118
  {
119
- "epoch": 0.97,
120
- "learning_rate": 0.0001272627935421667,
121
- "loss": 1.0328,
122
  "step": 95
123
  },
124
  {
125
- "epoch": 1.0,
126
- "eval_loss": 0.9301878213882446,
127
- "eval_runtime": 7.0442,
128
- "eval_samples_per_second": 20.726,
129
- "eval_steps_per_second": 2.697,
130
- "step": 98
131
  },
132
  {
133
- "epoch": 1.02,
134
- "learning_rate": 0.00013218648955393698,
135
- "loss": 0.8862,
136
- "step": 100
 
 
137
  },
138
  {
139
- "epoch": 1.07,
140
- "learning_rate": 0.00013548005477567298,
141
- "loss": 0.9512,
142
  "step": 105
143
  },
144
  {
145
- "epoch": 1.12,
146
- "learning_rate": 0.00013705905394267309,
147
- "loss": 1.0269,
148
  "step": 110
149
  },
150
  {
151
- "epoch": 1.17,
152
- "learning_rate": 0.000136883007148315,
153
- "loss": 1.0087,
154
  "step": 115
155
  },
156
  {
157
- "epoch": 1.22,
158
- "learning_rate": 0.00013495642760447747,
159
- "loss": 0.999,
160
  "step": 120
161
  },
162
  {
163
- "epoch": 1.28,
164
- "learning_rate": 0.00013132870593888493,
165
- "loss": 1.1179,
166
  "step": 125
167
  },
168
  {
169
- "epoch": 1.33,
170
- "learning_rate": 0.00012609284399558039,
171
- "loss": 0.9949,
172
  "step": 130
173
  },
174
  {
175
- "epoch": 1.38,
176
- "learning_rate": 0.0001193830705993666,
177
- "loss": 0.9807,
178
  "step": 135
179
  },
180
  {
181
- "epoch": 1.43,
182
- "learning_rate": 0.00011137140040750923,
183
- "loss": 1.0501,
184
  "step": 140
185
  },
186
  {
187
- "epoch": 1.48,
188
- "learning_rate": 0.00010226322406747004,
189
- "loss": 0.9548,
190
  "step": 145
191
  },
192
  {
193
- "epoch": 1.53,
194
- "learning_rate": 9.229204273330182e-05,
195
- "loss": 1.0041,
196
  "step": 150
197
  },
198
  {
199
- "epoch": 1.58,
200
- "learning_rate": 8.171348192891448e-05,
201
- "loss": 0.9339,
202
  "step": 155
203
  },
204
  {
205
- "epoch": 1.63,
206
- "learning_rate": 7.079873822141611e-05,
207
- "loss": 0.9923,
208
  "step": 160
209
  },
210
  {
211
- "epoch": 1.68,
212
- "learning_rate": 5.982762670844271e-05,
213
- "loss": 1.009,
214
  "step": 165
215
  },
216
  {
217
- "epoch": 1.73,
218
- "learning_rate": 4.908140755711122e-05,
219
- "loss": 0.9595,
220
  "step": 170
221
  },
222
  {
223
- "epoch": 1.79,
224
- "learning_rate": 3.883557549653576e-05,
225
- "loss": 0.9652,
226
  "step": 175
227
  },
228
  {
229
- "epoch": 1.84,
230
- "learning_rate": 2.935279711561958e-05,
231
- "loss": 1.0619,
232
  "step": 180
233
  },
234
  {
235
- "epoch": 1.89,
236
- "learning_rate": 2.087617702860066e-05,
237
- "loss": 0.9697,
238
  "step": 185
239
  },
240
  {
241
- "epoch": 1.94,
242
- "learning_rate": 1.3623025539858162e-05,
243
- "loss": 1.0304,
244
  "step": 190
245
  },
246
  {
247
- "epoch": 1.99,
248
- "learning_rate": 7.779287582812291e-06,
249
- "loss": 0.9482,
250
  "step": 195
251
  },
252
  {
253
- "epoch": 2.0,
254
- "eval_loss": 0.9282976984977722,
255
- "eval_runtime": 7.0544,
256
- "eval_samples_per_second": 20.696,
257
- "eval_steps_per_second": 2.693,
258
- "step": 196
259
- },
260
- {
261
- "epoch": 2.06,
262
- "learning_rate": 2.8937315607040204e-06,
263
- "loss": 1.0557,
264
  "step": 200
265
  },
266
  {
267
- "epoch": 2.11,
268
- "learning_rate": 6.931910561541759e-06,
269
- "loss": 0.9196,
270
- "step": 205
271
- },
272
- {
273
- "epoch": 2.16,
274
- "learning_rate": 1.2583732714275138e-05,
275
- "loss": 0.9577,
276
- "step": 210
277
- },
278
- {
279
- "epoch": 2.22,
280
- "learning_rate": 1.9701309148087243e-05,
281
- "loss": 1.0242,
282
- "step": 215
283
- },
284
- {
285
- "epoch": 2.27,
286
- "learning_rate": 2.8098397211793345e-05,
287
- "loss": 0.9276,
288
- "step": 220
289
- },
290
- {
291
- "epoch": 2.32,
292
- "learning_rate": 3.755527380754418e-05,
293
- "loss": 0.815,
294
- "step": 225
295
- },
296
- {
297
- "epoch": 2.37,
298
- "learning_rate": 4.782448479340004e-05,
299
- "loss": 0.9471,
300
- "step": 230
301
- },
302
- {
303
- "epoch": 2.42,
304
- "learning_rate": 5.863732001255875e-05,
305
- "loss": 0.937,
306
- "step": 235
307
- },
308
- {
309
- "epoch": 2.47,
310
- "learning_rate": 6.97108445198519e-05,
311
- "loss": 0.9469,
312
- "step": 240
313
- },
314
- {
315
- "epoch": 2.53,
316
- "learning_rate": 8.075530202233491e-05,
317
- "loss": 0.9792,
318
- "step": 245
319
- },
320
- {
321
- "epoch": 2.58,
322
- "learning_rate": 9.148169681124647e-05,
323
- "loss": 0.9752,
324
- "step": 250
325
- },
326
- {
327
- "epoch": 2.63,
328
- "learning_rate": 0.00010160935579206932,
329
- "loss": 1.0419,
330
- "step": 255
331
- },
332
- {
333
- "epoch": 2.68,
334
- "learning_rate": 0.00011087327274022367,
335
- "loss": 1.0001,
336
- "step": 260
337
- },
338
- {
339
- "epoch": 2.73,
340
- "learning_rate": 0.00011903104260831395,
341
- "loss": 0.9188,
342
- "step": 265
343
- },
344
- {
345
- "epoch": 2.78,
346
- "learning_rate": 0.00012586920443780796,
347
- "loss": 1.0406,
348
- "step": 270
349
- },
350
- {
351
- "epoch": 2.84,
352
- "learning_rate": 0.0001312088269028256,
353
- "loss": 0.9707,
354
- "step": 275
355
- },
356
- {
357
- "epoch": 2.89,
358
- "learning_rate": 0.0001349101903314451,
359
- "loss": 1.0249,
360
- "step": 280
361
- },
362
- {
363
- "epoch": 2.94,
364
- "learning_rate": 0.0001368764426920297,
365
- "loss": 1.0981,
366
- "step": 285
367
- },
368
- {
369
- "epoch": 2.99,
370
- "learning_rate": 0.00013705613387990704,
371
- "loss": 1.0821,
372
- "step": 290
373
- },
374
- {
375
- "epoch": 3.0,
376
- "eval_loss": 0.8179315328598022,
377
- "eval_runtime": 7.3188,
378
- "eval_samples_per_second": 21.178,
379
- "eval_steps_per_second": 2.733,
380
- "step": 291
381
  }
382
  ],
383
- "max_steps": 291,
384
- "num_train_epochs": 3,
385
- "total_flos": 302053588992000.0,
386
  "trial_name": null,
387
  "trial_params": null
388
  }
 
1
  {
2
+ "best_metric": 0.8691701889038086,
3
+ "best_model_checkpoint": "output/morgenshtern/checkpoint-202",
4
+ "epoch": 2.0,
5
+ "global_step": 202,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.05,
12
+ "learning_rate": 4.8622670741210196e-05,
13
+ "loss": 0.9101,
14
  "step": 5
15
  },
16
  {
17
  "epoch": 0.1,
18
+ "learning_rate": 5.902924774844624e-05,
19
+ "loss": 0.8731,
20
  "step": 10
21
  },
22
  {
23
  "epoch": 0.15,
24
+ "learning_rate": 6.96668542977361e-05,
25
+ "loss": 0.9581,
26
  "step": 15
27
  },
28
  {
29
  "epoch": 0.2,
30
+ "learning_rate": 8.027870792255626e-05,
31
+ "loss": 0.8474,
32
  "step": 20
33
  },
34
  {
35
+ "epoch": 0.25,
36
+ "learning_rate": 9.060864780933411e-05,
37
+ "loss": 0.9838,
38
  "step": 25
39
  },
40
  {
41
+ "epoch": 0.3,
42
+ "learning_rate": 0.00010040731829419309,
43
+ "loss": 1.012,
44
  "step": 30
45
  },
46
  {
47
+ "epoch": 0.35,
48
+ "learning_rate": 0.00010943818808937948,
49
+ "loss": 0.8702,
50
  "step": 35
51
  },
52
  {
53
+ "epoch": 0.4,
54
+ "learning_rate": 0.00011748325994051893,
55
+ "loss": 0.9656,
56
  "step": 40
57
  },
58
  {
59
+ "epoch": 0.45,
60
+ "learning_rate": 0.00012434833288861775,
61
+ "loss": 0.9745,
62
  "step": 45
63
  },
64
  {
65
+ "epoch": 0.5,
66
+ "learning_rate": 0.0001298676901104138,
67
+ "loss": 0.8875,
68
  "step": 50
69
  },
70
  {
71
+ "epoch": 0.54,
72
+ "learning_rate": 0.00013390809917674526,
73
+ "loss": 0.9281,
74
  "step": 55
75
  },
76
  {
77
+ "epoch": 0.59,
78
+ "learning_rate": 0.00013637202816621594,
79
+ "loss": 0.932,
80
  "step": 60
81
  },
82
  {
83
+ "epoch": 0.64,
84
+ "learning_rate": 0.0001372,
85
+ "loss": 1.071,
86
  "step": 65
87
  },
88
  {
89
+ "epoch": 0.69,
90
+ "learning_rate": 0.000136372028166216,
91
+ "loss": 0.8787,
92
  "step": 70
93
  },
94
  {
95
+ "epoch": 0.74,
96
+ "learning_rate": 0.00013390809917674537,
97
+ "loss": 0.9074,
98
  "step": 75
99
  },
100
  {
101
+ "epoch": 0.79,
102
+ "learning_rate": 0.00012986769011041397,
103
+ "loss": 1.0496,
104
  "step": 80
105
  },
106
  {
107
+ "epoch": 0.84,
108
+ "learning_rate": 0.00012434833288861794,
109
+ "loss": 0.9943,
110
  "step": 85
111
  },
112
  {
113
+ "epoch": 0.89,
114
+ "learning_rate": 0.00011748325994051916,
115
+ "loss": 1.0026,
116
  "step": 90
117
  },
118
  {
119
+ "epoch": 0.94,
120
+ "learning_rate": 0.00010943818808937974,
121
+ "loss": 1.0201,
122
  "step": 95
123
  },
124
  {
125
+ "epoch": 0.99,
126
+ "learning_rate": 0.00010040731829419337,
127
+ "loss": 0.9215,
128
+ "step": 100
 
 
129
  },
130
  {
131
+ "epoch": 1.0,
132
+ "eval_loss": 0.8902494311332703,
133
+ "eval_runtime": 5.4222,
134
+ "eval_samples_per_second": 22.684,
135
+ "eval_steps_per_second": 2.951,
136
+ "step": 101
137
  },
138
  {
139
+ "epoch": 1.04,
140
+ "learning_rate": 9.060864780933398e-05,
141
+ "loss": 0.9922,
142
  "step": 105
143
  },
144
  {
145
+ "epoch": 1.09,
146
+ "learning_rate": 8.027870792255707e-05,
147
+ "loss": 0.8911,
148
  "step": 110
149
  },
150
  {
151
+ "epoch": 1.14,
152
+ "learning_rate": 6.966685429773643e-05,
153
+ "loss": 0.9184,
154
  "step": 115
155
  },
156
  {
157
+ "epoch": 1.19,
158
+ "learning_rate": 5.902924774844707e-05,
159
+ "loss": 0.8339,
160
  "step": 120
161
  },
162
  {
163
+ "epoch": 1.24,
164
+ "learning_rate": 4.862267074121052e-05,
165
+ "loss": 0.791,
166
  "step": 125
167
  },
168
  {
169
+ "epoch": 1.29,
170
+ "learning_rate": 3.869832889258939e-05,
171
+ "loss": 0.8984,
172
  "step": 130
173
  },
174
  {
175
+ "epoch": 1.34,
176
+ "learning_rate": 2.9495787086535028e-05,
177
+ "loss": 0.7645,
178
  "step": 135
179
  },
180
  {
181
+ "epoch": 1.39,
182
+ "learning_rate": 2.1237186588777798e-05,
183
+ "loss": 0.8448,
184
  "step": 140
185
  },
186
  {
187
+ "epoch": 1.44,
188
+ "learning_rate": 1.4121882752050083e-05,
189
+ "loss": 0.8783,
190
  "step": 145
191
  },
192
  {
193
+ "epoch": 1.49,
194
+ "learning_rate": 8.321632753190209e-06,
195
+ "loss": 0.8214,
196
  "step": 150
197
  },
198
  {
199
+ "epoch": 1.53,
200
+ "learning_rate": 3.976449525958718e-06,
201
+ "loss": 0.7646,
202
  "step": 155
203
  },
204
  {
205
+ "epoch": 1.58,
206
+ "learning_rate": 1.1912219719526667e-06,
207
+ "loss": 0.8915,
208
  "step": 160
209
  },
210
  {
211
+ "epoch": 1.63,
212
+ "learning_rate": 3.318303476960258e-08,
213
+ "loss": 0.7759,
214
  "step": 165
215
  },
216
  {
217
+ "epoch": 1.68,
218
+ "learning_rate": 5.302867558791814e-07,
219
+ "loss": 0.9129,
220
  "step": 170
221
  },
222
  {
223
+ "epoch": 1.73,
224
+ "learning_rate": 2.670533488647443e-06,
225
+ "loss": 0.8839,
226
  "step": 175
227
  },
228
  {
229
+ "epoch": 1.78,
230
+ "learning_rate": 6.402259559252401e-06,
231
+ "loss": 0.9542,
232
  "step": 180
233
  },
234
  {
235
+ "epoch": 1.83,
236
+ "learning_rate": 1.1635384382334973e-05,
237
+ "loss": 0.891,
238
  "step": 185
239
  },
240
  {
241
+ "epoch": 1.88,
242
+ "learning_rate": 1.824358492710126e-05,
243
+ "loss": 0.7246,
244
  "step": 190
245
  },
246
  {
247
+ "epoch": 1.93,
248
+ "learning_rate": 2.6067345044190458e-05,
249
+ "loss": 0.7276,
250
  "step": 195
251
  },
252
  {
253
+ "epoch": 1.98,
254
+ "learning_rate": 3.491780604522984e-05,
255
+ "loss": 0.8327,
 
 
 
 
 
 
 
 
256
  "step": 200
257
  },
258
  {
259
+ "epoch": 2.0,
260
+ "eval_loss": 0.8691701889038086,
261
+ "eval_runtime": 5.4146,
262
+ "eval_samples_per_second": 22.717,
263
+ "eval_steps_per_second": 2.955,
264
+ "step": 202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  }
266
  ],
267
+ "max_steps": 404,
268
+ "num_train_epochs": 4,
269
+ "total_flos": 209817501696000.0,
270
  "trial_name": null,
271
  "trial_params": null
272
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96ee95e4c456b61288f96b2abd17f7901d4c06694edf41fdc6b197ab27bbeae6
3
  size 2863
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd3bfc9cf6ae7969e054c62bceff3f5cf23d65ecf03aee4de9a1c7b331576fd6
3
  size 2863