AlekseyKorshuk commited on
Commit
0c9afed
1 Parent(s): c618893

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/morgenshtern")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/22rvzlbq/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MORGENSHTERN's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/320wuvzx) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/320wuvzx/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/morgenshtern")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/19ghjsta/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MORGENSHTERN's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3the5qdy) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3the5qdy/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -35,7 +35,7 @@
35
  }
36
  },
37
  "torch_dtype": "float32",
38
- "transformers_version": "4.10.2",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
 
35
  }
36
  },
37
  "torch_dtype": "float32",
38
+ "transformers_version": "4.11.2",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 0.9282976984977722, "eval_runtime": 7.0681, "eval_samples_per_second": 20.656, "eval_steps_per_second": 2.688, "epoch": 3.0}
 
1
+ {"eval_loss": 0.8179315328598022, "eval_runtime": 7.3257, "eval_samples_per_second": 21.158, "eval_steps_per_second": 2.73, "epoch": 3.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:290b054aa26a75472ea89462728e1fbe01bb1cbbb7bf90d372ad250db6831111
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bfe735b97d1edf92f1881ff486f7bbdb5e1c909ebc2fe3797e20c998d462eef
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfb92c5db45ae57ec8704270fd02cd24309911146a62e4ffdfe187cdfacc2cca
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c09c40bf4dfd5fd306d702098d94d3bbdc47999df4e071987a459220d393d0d
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75dfc06c83e4ec2c68737881be276359157fac03863eda5033a21d9e80f92ef9
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e79396262fb67544ae8b77d0955613d7d836ace7dac0bfc78d519268a6d4270c
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffc74e881d44215266a1c99f799520e15420c54f0cc889ce0ad363a9b0849b03
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad6fe78f0f512530d4eb0b75148f257776f4270926c8eeb3605dc42ed450ad9f
3
+ size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b9d873c8cacf8459303fcc5fcff12d277bec46420342ef76b13541e314fd254
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:592ef97b2b907edd507cff14ca8b3cd3c3eaddc58fa33aedd4b1cf49372e69c1
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.9282976984977722,
3
- "best_model_checkpoint": "output/morgenshtern/checkpoint-196",
4
- "epoch": 2.0,
5
- "global_step": 196,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -256,11 +256,133 @@
256
  "eval_samples_per_second": 20.696,
257
  "eval_steps_per_second": 2.693,
258
  "step": 196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  }
260
  ],
261
- "max_steps": 294,
262
  "num_train_epochs": 3,
263
- "total_flos": 203546492928000.0,
264
  "trial_name": null,
265
  "trial_params": null
266
  }
 
1
  {
2
+ "best_metric": 0.8179315328598022,
3
+ "best_model_checkpoint": "output/morgenshtern/checkpoint-291",
4
+ "epoch": 3.0,
5
+ "global_step": 291,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
256
  "eval_samples_per_second": 20.696,
257
  "eval_steps_per_second": 2.693,
258
  "step": 196
259
+ },
260
+ {
261
+ "epoch": 2.06,
262
+ "learning_rate": 2.8937315607040204e-06,
263
+ "loss": 1.0557,
264
+ "step": 200
265
+ },
266
+ {
267
+ "epoch": 2.11,
268
+ "learning_rate": 6.931910561541759e-06,
269
+ "loss": 0.9196,
270
+ "step": 205
271
+ },
272
+ {
273
+ "epoch": 2.16,
274
+ "learning_rate": 1.2583732714275138e-05,
275
+ "loss": 0.9577,
276
+ "step": 210
277
+ },
278
+ {
279
+ "epoch": 2.22,
280
+ "learning_rate": 1.9701309148087243e-05,
281
+ "loss": 1.0242,
282
+ "step": 215
283
+ },
284
+ {
285
+ "epoch": 2.27,
286
+ "learning_rate": 2.8098397211793345e-05,
287
+ "loss": 0.9276,
288
+ "step": 220
289
+ },
290
+ {
291
+ "epoch": 2.32,
292
+ "learning_rate": 3.755527380754418e-05,
293
+ "loss": 0.815,
294
+ "step": 225
295
+ },
296
+ {
297
+ "epoch": 2.37,
298
+ "learning_rate": 4.782448479340004e-05,
299
+ "loss": 0.9471,
300
+ "step": 230
301
+ },
302
+ {
303
+ "epoch": 2.42,
304
+ "learning_rate": 5.863732001255875e-05,
305
+ "loss": 0.937,
306
+ "step": 235
307
+ },
308
+ {
309
+ "epoch": 2.47,
310
+ "learning_rate": 6.97108445198519e-05,
311
+ "loss": 0.9469,
312
+ "step": 240
313
+ },
314
+ {
315
+ "epoch": 2.53,
316
+ "learning_rate": 8.075530202233491e-05,
317
+ "loss": 0.9792,
318
+ "step": 245
319
+ },
320
+ {
321
+ "epoch": 2.58,
322
+ "learning_rate": 9.148169681124647e-05,
323
+ "loss": 0.9752,
324
+ "step": 250
325
+ },
326
+ {
327
+ "epoch": 2.63,
328
+ "learning_rate": 0.00010160935579206932,
329
+ "loss": 1.0419,
330
+ "step": 255
331
+ },
332
+ {
333
+ "epoch": 2.68,
334
+ "learning_rate": 0.00011087327274022367,
335
+ "loss": 1.0001,
336
+ "step": 260
337
+ },
338
+ {
339
+ "epoch": 2.73,
340
+ "learning_rate": 0.00011903104260831395,
341
+ "loss": 0.9188,
342
+ "step": 265
343
+ },
344
+ {
345
+ "epoch": 2.78,
346
+ "learning_rate": 0.00012586920443780796,
347
+ "loss": 1.0406,
348
+ "step": 270
349
+ },
350
+ {
351
+ "epoch": 2.84,
352
+ "learning_rate": 0.0001312088269028256,
353
+ "loss": 0.9707,
354
+ "step": 275
355
+ },
356
+ {
357
+ "epoch": 2.89,
358
+ "learning_rate": 0.0001349101903314451,
359
+ "loss": 1.0249,
360
+ "step": 280
361
+ },
362
+ {
363
+ "epoch": 2.94,
364
+ "learning_rate": 0.0001368764426920297,
365
+ "loss": 1.0981,
366
+ "step": 285
367
+ },
368
+ {
369
+ "epoch": 2.99,
370
+ "learning_rate": 0.00013705613387990704,
371
+ "loss": 1.0821,
372
+ "step": 290
373
+ },
374
+ {
375
+ "epoch": 3.0,
376
+ "eval_loss": 0.8179315328598022,
377
+ "eval_runtime": 7.3188,
378
+ "eval_samples_per_second": 21.178,
379
+ "eval_steps_per_second": 2.733,
380
+ "step": 291
381
  }
382
  ],
383
+ "max_steps": 291,
384
  "num_train_epochs": 3,
385
+ "total_flos": 302053588992000.0,
386
  "trial_name": null,
387
  "trial_params": null
388
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:640d62725592ddeb2633cb42cfa4872adce692e463f0c31e16d57bedfc0f8439
3
- size 2671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96ee95e4c456b61288f96b2abd17f7901d4c06694edf41fdc6b197ab27bbeae6
3
+ size 2863