ihanif commited on
Commit
00f5110
1 Parent(s): c13c4f6

End of training

Browse files
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 6.33,
3
- "eval_cer": 0.9545736113978939,
4
- "eval_loss": 4.904029369354248,
5
- "eval_runtime": 53.4856,
6
  "eval_samples": 481,
7
- "eval_samples_per_second": 8.993,
8
- "eval_steps_per_second": 1.14,
9
- "eval_wer": 0.9322987721691678,
10
- "train_loss": 6.457582977294922,
11
- "train_runtime": 2324.152,
12
  "train_samples": 2528,
13
- "train_samples_per_second": 6.884,
14
- "train_steps_per_second": 0.215
15
  }
 
1
  {
2
+ "epoch": 12.66,
3
+ "eval_cer": 0.9608244326394233,
4
+ "eval_loss": 4.192136287689209,
5
+ "eval_runtime": 52.7328,
6
  "eval_samples": 481,
7
+ "eval_samples_per_second": 9.121,
8
+ "eval_steps_per_second": 1.157,
9
+ "eval_wer": 0.9294849931787176,
10
+ "train_loss": 1.9340453338623047,
11
+ "train_runtime": 2875.5301,
12
  "train_samples": 2528,
13
+ "train_samples_per_second": 11.128,
14
+ "train_steps_per_second": 0.348
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 6.33,
3
- "eval_cer": 0.9545736113978939,
4
- "eval_loss": 4.904029369354248,
5
- "eval_runtime": 53.4856,
6
  "eval_samples": 481,
7
- "eval_samples_per_second": 8.993,
8
- "eval_steps_per_second": 1.14,
9
- "eval_wer": 0.9322987721691678
10
  }
 
1
  {
2
+ "epoch": 12.66,
3
+ "eval_cer": 0.9608244326394233,
4
+ "eval_loss": 4.192136287689209,
5
+ "eval_runtime": 52.7328,
6
  "eval_samples": 481,
7
+ "eval_samples_per_second": 9.121,
8
+ "eval_steps_per_second": 1.157,
9
+ "eval_wer": 0.9294849931787176
10
  }
runs/Dec18_17-32-59_129-146-179-188/events.out.tfevents.1671388061.129-146-179-188.141056.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1daf1563a93e258ca38036d9bbede0dd1eb72b9269b014b7ca7ddc67dd8eeb64
3
+ size 405
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 6.33,
3
- "train_loss": 6.457582977294922,
4
- "train_runtime": 2324.152,
5
  "train_samples": 2528,
6
- "train_samples_per_second": 6.884,
7
- "train_steps_per_second": 0.215
8
  }
 
1
  {
2
+ "epoch": 12.66,
3
+ "train_loss": 1.9340453338623047,
4
+ "train_runtime": 2875.5301,
5
  "train_samples": 2528,
6
+ "train_samples_per_second": 11.128,
7
+ "train_steps_per_second": 0.348
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 4.904029369354248,
3
- "best_model_checkpoint": "./checkpoint-500",
4
- "epoch": 6.329113924050633,
5
- "global_step": 500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -358,18 +358,328 @@
358
  "step": 500
359
  },
360
  {
361
- "epoch": 6.33,
362
- "step": 500,
363
- "total_flos": 1.7609861732211995e+19,
364
- "train_loss": 6.457582977294922,
365
- "train_runtime": 2324.152,
366
- "train_samples_per_second": 6.884,
367
- "train_steps_per_second": 0.215
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
  }
369
  ],
370
- "max_steps": 500,
371
- "num_train_epochs": 7,
372
- "total_flos": 1.7609861732211995e+19,
373
  "trial_name": null,
374
  "trial_params": null
375
  }
 
1
  {
2
+ "best_metric": 4.192136287689209,
3
+ "best_model_checkpoint": "./checkpoint-1000",
4
+ "epoch": 12.658227848101266,
5
+ "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
358
  "step": 500
359
  },
360
  {
361
+ "epoch": 6.46,
362
+ "learning_rate": 3.8025000000000003e-07,
363
+ "loss": 4.3992,
364
+ "step": 510
365
+ },
366
+ {
367
+ "epoch": 6.58,
368
+ "learning_rate": 3.8775e-07,
369
+ "loss": 4.1942,
370
+ "step": 520
371
+ },
372
+ {
373
+ "epoch": 6.71,
374
+ "learning_rate": 3.9525000000000005e-07,
375
+ "loss": 4.339,
376
+ "step": 530
377
+ },
378
+ {
379
+ "epoch": 6.84,
380
+ "learning_rate": 4.0275000000000003e-07,
381
+ "loss": 4.2826,
382
+ "step": 540
383
+ },
384
+ {
385
+ "epoch": 6.96,
386
+ "learning_rate": 4.1025000000000006e-07,
387
+ "loss": 4.3752,
388
+ "step": 550
389
+ },
390
+ {
391
+ "epoch": 7.09,
392
+ "learning_rate": 4.1775000000000004e-07,
393
+ "loss": 3.9576,
394
+ "step": 560
395
+ },
396
+ {
397
+ "epoch": 7.22,
398
+ "learning_rate": 4.2524999999999997e-07,
399
+ "loss": 4.0814,
400
+ "step": 570
401
+ },
402
+ {
403
+ "epoch": 7.34,
404
+ "learning_rate": 4.3274999999999995e-07,
405
+ "loss": 4.3274,
406
+ "step": 580
407
+ },
408
+ {
409
+ "epoch": 7.47,
410
+ "learning_rate": 4.4025e-07,
411
+ "loss": 3.9749,
412
+ "step": 590
413
+ },
414
+ {
415
+ "epoch": 7.59,
416
+ "learning_rate": 4.4774999999999997e-07,
417
+ "loss": 3.8373,
418
+ "step": 600
419
+ },
420
+ {
421
+ "epoch": 7.72,
422
+ "learning_rate": 4.5525e-07,
423
+ "loss": 4.1076,
424
+ "step": 610
425
+ },
426
+ {
427
+ "epoch": 7.85,
428
+ "learning_rate": 4.6275e-07,
429
+ "loss": 3.8279,
430
+ "step": 620
431
+ },
432
+ {
433
+ "epoch": 7.97,
434
+ "learning_rate": 4.7025e-07,
435
+ "loss": 4.0263,
436
+ "step": 630
437
+ },
438
+ {
439
+ "epoch": 8.1,
440
+ "learning_rate": 4.7775e-07,
441
+ "loss": 3.8141,
442
+ "step": 640
443
+ },
444
+ {
445
+ "epoch": 8.23,
446
+ "learning_rate": 4.8525e-07,
447
+ "loss": 3.8791,
448
+ "step": 650
449
+ },
450
+ {
451
+ "epoch": 8.35,
452
+ "learning_rate": 4.927500000000001e-07,
453
+ "loss": 3.6906,
454
+ "step": 660
455
+ },
456
+ {
457
+ "epoch": 8.48,
458
+ "learning_rate": 5.0025e-07,
459
+ "loss": 4.0741,
460
+ "step": 670
461
+ },
462
+ {
463
+ "epoch": 8.61,
464
+ "learning_rate": 5.0775e-07,
465
+ "loss": 3.7322,
466
+ "step": 680
467
+ },
468
+ {
469
+ "epoch": 8.73,
470
+ "learning_rate": 5.152500000000001e-07,
471
+ "loss": 4.1834,
472
+ "step": 690
473
+ },
474
+ {
475
+ "epoch": 8.86,
476
+ "learning_rate": 5.2275e-07,
477
+ "loss": 3.8571,
478
+ "step": 700
479
+ },
480
+ {
481
+ "epoch": 8.99,
482
+ "learning_rate": 5.3025e-07,
483
+ "loss": 3.9223,
484
+ "step": 710
485
+ },
486
+ {
487
+ "epoch": 9.11,
488
+ "learning_rate": 5.3775e-07,
489
+ "loss": 3.6997,
490
+ "step": 720
491
+ },
492
+ {
493
+ "epoch": 9.24,
494
+ "learning_rate": 5.4525e-07,
495
+ "loss": 4.0024,
496
+ "step": 730
497
+ },
498
+ {
499
+ "epoch": 9.37,
500
+ "learning_rate": 5.5275e-07,
501
+ "loss": 3.7653,
502
+ "step": 740
503
+ },
504
+ {
505
+ "epoch": 9.49,
506
+ "learning_rate": 5.602500000000001e-07,
507
+ "loss": 3.873,
508
+ "step": 750
509
+ },
510
+ {
511
+ "epoch": 9.62,
512
+ "learning_rate": 5.6775e-07,
513
+ "loss": 3.597,
514
+ "step": 760
515
+ },
516
+ {
517
+ "epoch": 9.75,
518
+ "learning_rate": 5.7525e-07,
519
+ "loss": 3.7902,
520
+ "step": 770
521
+ },
522
+ {
523
+ "epoch": 9.87,
524
+ "learning_rate": 5.827500000000001e-07,
525
+ "loss": 3.8314,
526
+ "step": 780
527
+ },
528
+ {
529
+ "epoch": 10.0,
530
+ "learning_rate": 5.902500000000001e-07,
531
+ "loss": 3.956,
532
+ "step": 790
533
+ },
534
+ {
535
+ "epoch": 10.13,
536
+ "learning_rate": 5.9775e-07,
537
+ "loss": 3.6297,
538
+ "step": 800
539
+ },
540
+ {
541
+ "epoch": 10.25,
542
+ "learning_rate": 6.0525e-07,
543
+ "loss": 4.0449,
544
+ "step": 810
545
+ },
546
+ {
547
+ "epoch": 10.38,
548
+ "learning_rate": 6.1275e-07,
549
+ "loss": 3.5669,
550
+ "step": 820
551
+ },
552
+ {
553
+ "epoch": 10.51,
554
+ "learning_rate": 6.2025e-07,
555
+ "loss": 3.6994,
556
+ "step": 830
557
+ },
558
+ {
559
+ "epoch": 10.63,
560
+ "learning_rate": 6.277499999999999e-07,
561
+ "loss": 3.6146,
562
+ "step": 840
563
+ },
564
+ {
565
+ "epoch": 10.76,
566
+ "learning_rate": 6.3525e-07,
567
+ "loss": 3.8637,
568
+ "step": 850
569
+ },
570
+ {
571
+ "epoch": 10.89,
572
+ "learning_rate": 6.4275e-07,
573
+ "loss": 3.7,
574
+ "step": 860
575
+ },
576
+ {
577
+ "epoch": 11.01,
578
+ "learning_rate": 6.5025e-07,
579
+ "loss": 3.8526,
580
+ "step": 870
581
+ },
582
+ {
583
+ "epoch": 11.14,
584
+ "learning_rate": 6.5775e-07,
585
+ "loss": 3.7276,
586
+ "step": 880
587
+ },
588
+ {
589
+ "epoch": 11.27,
590
+ "learning_rate": 6.6525e-07,
591
+ "loss": 3.989,
592
+ "step": 890
593
+ },
594
+ {
595
+ "epoch": 11.39,
596
+ "learning_rate": 6.7275e-07,
597
+ "loss": 3.7838,
598
+ "step": 900
599
+ },
600
+ {
601
+ "epoch": 11.52,
602
+ "learning_rate": 6.802500000000001e-07,
603
+ "loss": 3.698,
604
+ "step": 910
605
+ },
606
+ {
607
+ "epoch": 11.65,
608
+ "learning_rate": 6.8775e-07,
609
+ "loss": 3.4391,
610
+ "step": 920
611
+ },
612
+ {
613
+ "epoch": 11.77,
614
+ "learning_rate": 6.9525e-07,
615
+ "loss": 3.7371,
616
+ "step": 930
617
+ },
618
+ {
619
+ "epoch": 11.9,
620
+ "learning_rate": 7.027500000000001e-07,
621
+ "loss": 3.4201,
622
+ "step": 940
623
+ },
624
+ {
625
+ "epoch": 12.03,
626
+ "learning_rate": 7.1025e-07,
627
+ "loss": 3.7721,
628
+ "step": 950
629
+ },
630
+ {
631
+ "epoch": 12.15,
632
+ "learning_rate": 7.1775e-07,
633
+ "loss": 3.7842,
634
+ "step": 960
635
+ },
636
+ {
637
+ "epoch": 12.28,
638
+ "learning_rate": 7.2525e-07,
639
+ "loss": 3.8125,
640
+ "step": 970
641
+ },
642
+ {
643
+ "epoch": 12.41,
644
+ "learning_rate": 7.3275e-07,
645
+ "loss": 3.7673,
646
+ "step": 980
647
+ },
648
+ {
649
+ "epoch": 12.53,
650
+ "learning_rate": 7.395e-07,
651
+ "loss": 3.7048,
652
+ "step": 990
653
+ },
654
+ {
655
+ "epoch": 12.66,
656
+ "learning_rate": 7.47e-07,
657
+ "loss": 3.3986,
658
+ "step": 1000
659
+ },
660
+ {
661
+ "epoch": 12.66,
662
+ "eval_cer": 0.9608244326394233,
663
+ "eval_loss": 4.192136287689209,
664
+ "eval_runtime": 52.6187,
665
+ "eval_samples_per_second": 9.141,
666
+ "eval_steps_per_second": 1.159,
667
+ "eval_wer": 0.9294849931787176,
668
+ "step": 1000
669
+ },
670
+ {
671
+ "epoch": 12.66,
672
+ "step": 1000,
673
+ "total_flos": 3.5131981783950033e+19,
674
+ "train_loss": 1.9340453338623047,
675
+ "train_runtime": 2875.5301,
676
+ "train_samples_per_second": 11.128,
677
+ "train_steps_per_second": 0.348
678
  }
679
  ],
680
+ "max_steps": 1000,
681
+ "num_train_epochs": 13,
682
+ "total_flos": 3.5131981783950033e+19,
683
  "trial_name": null,
684
  "trial_params": null
685
  }