farmery commited on
Commit
64442c8
·
verified ·
1 Parent(s): 78e709f

Training in progress, step 375, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f27b8b80d334d2066d5e3ba4a598ec87bfa2723f7cc021f8322176c38ae6a0d
3
  size 9823216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3e3b73a39232a2414041333637ebab688f39c958d5d96f17b60e392e09f12fa
3
  size 9823216
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff060b07a09f9be73416a05f6fc0053d6fa781f8dd8a9f8e7524ea3e947116d1
3
- size 5962860
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:532dd76ad76481ae2b6558ea671c20f889a5e40aa56f9c9420f1a39db26cbdf6
3
+ size 5963308
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc3cd544df75518acc39b1bd0a83a4213ccee0385ffb5f2e3c177062df280884
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90e54d171ed44fce507d7399ea3fb5487ff9bdb6c8ea364ed7fec19eb88c9504
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac8b5d17c74159060c343eae005d4ee310e607299893d1e6bd56368f9a4576f8
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43f67ac88f6ab96777a4b6eac762dec0025e0025fbeec2ea84dfdf2e53f9d3f3
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:750a829250c1db0163207822acfc93fd30fed2b39c445bbcc2476956a5a31665
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b28ac22428360569c5d2db5ae111d45bce664ad777bace2bb77c4ee694928a37
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:173c953847af977989744eb101ba3739481decbf9d321ae03f9658e11078c996
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74f7952926915f7eed9aaef89269c11e94d5ae7c6014532f9b4d6ef6cdb48f1c
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d90c730646140ec36d7749c40daa51b09c4e3a0b620d5c95eeda7764b46e3d79
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:128e0b0294b5389dce5b958620f0aba512ba88459c3fb7de261ee4ac77eb7fa5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3394433129667346,
5
  "eval_steps": 125,
6
- "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -381,6 +381,189 @@
381
  "eval_samples_per_second": 160.71,
382
  "eval_steps_per_second": 20.202,
383
  "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  }
385
  ],
386
  "logging_steps": 5,
@@ -400,7 +583,7 @@
400
  "attributes": {}
401
  }
402
  },
403
- "total_flos": 5341488498081792.0,
404
  "train_batch_size": 2,
405
  "trial_name": null,
406
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5091649694501018,
5
  "eval_steps": 125,
6
+ "global_step": 375,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
381
  "eval_samples_per_second": 160.71,
382
  "eval_steps_per_second": 20.202,
383
  "step": 250
384
+ },
385
+ {
386
+ "epoch": 0.34623217922606925,
387
+ "grad_norm": 0.22694501280784607,
388
+ "learning_rate": 5e-05,
389
+ "loss": 1.4285,
390
+ "step": 255
391
+ },
392
+ {
393
+ "epoch": 0.3530210454854039,
394
+ "grad_norm": 0.23925504088401794,
395
+ "learning_rate": 4.839742112141724e-05,
396
+ "loss": 1.4588,
397
+ "step": 260
398
+ },
399
+ {
400
+ "epoch": 0.35980991174473864,
401
+ "grad_norm": 0.2735687494277954,
402
+ "learning_rate": 4.679648900096436e-05,
403
+ "loss": 1.4691,
404
+ "step": 265
405
+ },
406
+ {
407
+ "epoch": 0.3665987780040733,
408
+ "grad_norm": 0.29210373759269714,
409
+ "learning_rate": 4.5198848704615914e-05,
410
+ "loss": 1.441,
411
+ "step": 270
412
+ },
413
+ {
414
+ "epoch": 0.37338764426340804,
415
+ "grad_norm": 0.2414471060037613,
416
+ "learning_rate": 4.3606141915774693e-05,
417
+ "loss": 1.4356,
418
+ "step": 275
419
+ },
420
+ {
421
+ "epoch": 0.3801765105227427,
422
+ "grad_norm": 0.23838582634925842,
423
+ "learning_rate": 4.2020005248331054e-05,
424
+ "loss": 1.4412,
425
+ "step": 280
426
+ },
427
+ {
428
+ "epoch": 0.3869653767820774,
429
+ "grad_norm": 0.22352631390094757,
430
+ "learning_rate": 4.04420685649314e-05,
431
+ "loss": 1.459,
432
+ "step": 285
433
+ },
434
+ {
435
+ "epoch": 0.3937542430414121,
436
+ "grad_norm": 0.25490501523017883,
437
+ "learning_rate": 3.887395330218429e-05,
438
+ "loss": 1.4208,
439
+ "step": 290
440
+ },
441
+ {
442
+ "epoch": 0.40054310930074677,
443
+ "grad_norm": 0.26434803009033203,
444
+ "learning_rate": 3.731727080452464e-05,
445
+ "loss": 1.4264,
446
+ "step": 295
447
+ },
448
+ {
449
+ "epoch": 0.4073319755600815,
450
+ "grad_norm": 0.23617196083068848,
451
+ "learning_rate": 3.5773620668448384e-05,
452
+ "loss": 1.3935,
453
+ "step": 300
454
+ },
455
+ {
456
+ "epoch": 0.41412084181941616,
457
+ "grad_norm": 0.23081578314304352,
458
+ "learning_rate": 3.424458909881897e-05,
459
+ "loss": 1.4113,
460
+ "step": 305
461
+ },
462
+ {
463
+ "epoch": 0.42090970807875083,
464
+ "grad_norm": 0.24254988133907318,
465
+ "learning_rate": 3.273174727893463e-05,
466
+ "loss": 1.4908,
467
+ "step": 310
468
+ },
469
+ {
470
+ "epoch": 0.42769857433808556,
471
+ "grad_norm": 0.21009749174118042,
472
+ "learning_rate": 3.12366497560313e-05,
473
+ "loss": 1.4607,
474
+ "step": 315
475
+ },
476
+ {
477
+ "epoch": 0.4344874405974202,
478
+ "grad_norm": 0.2565441131591797,
479
+ "learning_rate": 2.976083284388031e-05,
480
+ "loss": 1.4664,
481
+ "step": 320
482
+ },
483
+ {
484
+ "epoch": 0.4412763068567549,
485
+ "grad_norm": 0.2448188215494156,
486
+ "learning_rate": 2.8305813044122097e-05,
487
+ "loss": 1.4369,
488
+ "step": 325
489
+ },
490
+ {
491
+ "epoch": 0.4480651731160896,
492
+ "grad_norm": 0.19102215766906738,
493
+ "learning_rate": 2.687308548795825e-05,
494
+ "loss": 1.3941,
495
+ "step": 330
496
+ },
497
+ {
498
+ "epoch": 0.4548540393754243,
499
+ "grad_norm": 0.23744595050811768,
500
+ "learning_rate": 2.5464122399803125e-05,
501
+ "loss": 1.4053,
502
+ "step": 335
503
+ },
504
+ {
505
+ "epoch": 0.461642905634759,
506
+ "grad_norm": 0.25465700030326843,
507
+ "learning_rate": 2.4080371584473748e-05,
508
+ "loss": 1.4054,
509
+ "step": 340
510
+ },
511
+ {
512
+ "epoch": 0.4684317718940937,
513
+ "grad_norm": 0.26104259490966797,
514
+ "learning_rate": 2.272325493947257e-05,
515
+ "loss": 1.4511,
516
+ "step": 345
517
+ },
518
+ {
519
+ "epoch": 0.47522063815342835,
520
+ "grad_norm": 0.22542956471443176,
521
+ "learning_rate": 2.139416699389153e-05,
522
+ "loss": 1.4673,
523
+ "step": 350
524
+ },
525
+ {
526
+ "epoch": 0.4820095044127631,
527
+ "grad_norm": 0.2605678141117096,
528
+ "learning_rate": 2.0094473475439202e-05,
529
+ "loss": 1.3932,
530
+ "step": 355
531
+ },
532
+ {
533
+ "epoch": 0.48879837067209775,
534
+ "grad_norm": 0.24848726391792297,
535
+ "learning_rate": 1.8825509907063327e-05,
536
+ "loss": 1.4204,
537
+ "step": 360
538
+ },
539
+ {
540
+ "epoch": 0.4955872369314325,
541
+ "grad_norm": 0.2906915843486786,
542
+ "learning_rate": 1.758858023461059e-05,
543
+ "loss": 1.3734,
544
+ "step": 365
545
+ },
546
+ {
547
+ "epoch": 0.5023761031907671,
548
+ "grad_norm": 0.25691258907318115,
549
+ "learning_rate": 1.6384955486934156e-05,
550
+ "loss": 1.4086,
551
+ "step": 370
552
+ },
553
+ {
554
+ "epoch": 0.5091649694501018,
555
+ "grad_norm": 0.2475077509880066,
556
+ "learning_rate": 1.5215872469825682e-05,
557
+ "loss": 1.4393,
558
+ "step": 375
559
+ },
560
+ {
561
+ "epoch": 0.5091649694501018,
562
+ "eval_loss": 1.4588358402252197,
563
+ "eval_runtime": 7.7286,
564
+ "eval_samples_per_second": 160.573,
565
+ "eval_steps_per_second": 20.185,
566
+ "step": 375
567
  }
568
  ],
569
  "logging_steps": 5,
 
583
  "attributes": {}
584
  }
585
  },
586
+ "total_flos": 8011565227835392.0,
587
  "train_batch_size": 2,
588
  "trial_name": null,
589
  "trial_params": null