vizsatiz commited on
Commit
1bb72a1
1 Parent(s): cb7d0a4

Training in progress, step 10, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d74923e6750ce3af92d86f86fc7f6d30ad9efa7c11c394f23c066b2da5e4f526
3
  size 4993448880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c92c54ae0107bcb9111ebd51df85637e4bac183dad2512154a7f3eb82998e26
3
  size 4993448880
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:390ee3b40845be964b720275542a4edda04350734e4f83d7c0297bb0c7a64411
3
  size 1180663192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38796f19c3dcb30ddbd8dec0ee8f8fcaa700cf01ae7942c175d63f3377efaccd
3
  size 1180663192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ff490a02658f8f152d0233e76caa37a0ff1931dcd33092964ff663f60902657
3
  size 3137572976
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d20bc8935001bab15a6d9eb845f9e2af4c58b17b8509bd446fc0c990aff6379e
3
  size 3137572976
last-checkpoint/trainer_state.json CHANGED
@@ -10,116 +10,116 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 19.29960060119629,
14
  "learning_rate": 1e-06,
15
  "loss": 1.1744,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 2.0,
20
- "grad_norm": 19.299541473388672,
21
  "learning_rate": 2e-06,
22
  "loss": 1.1744,
23
  "step": 2
24
  },
25
  {
26
  "epoch": 2.0,
27
- "eval_loss": 1.2005395889282227,
28
- "eval_runtime": 10.0466,
29
- "eval_samples_per_second": 0.199,
30
  "eval_steps_per_second": 0.1,
31
  "eval_wer": 100.0,
32
  "step": 2
33
  },
34
  {
35
  "epoch": 3.0,
36
- "grad_norm": 14.894689559936523,
37
  "learning_rate": 1.9238795325112867e-06,
38
- "loss": 1.1152,
39
  "step": 3
40
  },
41
  {
42
  "epoch": 4.0,
43
- "grad_norm": 7.684017181396484,
44
  "learning_rate": 1.7071067811865474e-06,
45
  "loss": 0.9985,
46
  "step": 4
47
  },
48
  {
49
  "epoch": 4.0,
50
- "eval_loss": 0.9411194920539856,
51
- "eval_runtime": 9.7666,
52
- "eval_samples_per_second": 0.205,
53
  "eval_steps_per_second": 0.102,
54
  "eval_wer": 100.0,
55
  "step": 4
56
  },
57
  {
58
  "epoch": 5.0,
59
- "grad_norm": 6.6712212562561035,
60
  "learning_rate": 1.3826834323650898e-06,
61
- "loss": 0.8863,
62
  "step": 5
63
  },
64
  {
65
  "epoch": 6.0,
66
- "grad_norm": 4.722819805145264,
67
  "learning_rate": 1e-06,
68
  "loss": 0.8454,
69
  "step": 6
70
  },
71
  {
72
  "epoch": 6.0,
73
- "eval_loss": 0.8363609910011292,
74
- "eval_runtime": 9.7724,
75
- "eval_samples_per_second": 0.205,
76
  "eval_steps_per_second": 0.102,
77
  "eval_wer": 100.0,
78
  "step": 6
79
  },
80
  {
81
  "epoch": 7.0,
82
- "grad_norm": 5.701328277587891,
83
  "learning_rate": 6.173165676349102e-07,
84
  "loss": 0.7801,
85
  "step": 7
86
  },
87
  {
88
  "epoch": 8.0,
89
- "grad_norm": 6.357309818267822,
90
  "learning_rate": 2.9289321881345254e-07,
91
  "loss": 0.7518,
92
  "step": 8
93
  },
94
  {
95
  "epoch": 8.0,
96
- "eval_loss": 0.7893769145011902,
97
- "eval_runtime": 10.0532,
98
- "eval_samples_per_second": 0.199,
99
  "eval_steps_per_second": 0.099,
100
  "eval_wer": 100.0,
101
  "step": 8
102
  },
103
  {
104
  "epoch": 9.0,
105
- "grad_norm": 5.225600242614746,
106
  "learning_rate": 7.612046748871326e-08,
107
- "loss": 0.7348,
108
  "step": 9
109
  },
110
  {
111
  "epoch": 10.0,
112
- "grad_norm": 4.553382873535156,
113
  "learning_rate": 0.0,
114
- "loss": 0.7247,
115
  "step": 10
116
  },
117
  {
118
  "epoch": 10.0,
119
- "eval_loss": 0.7804375290870667,
120
- "eval_runtime": 10.2291,
121
- "eval_samples_per_second": 0.196,
122
- "eval_steps_per_second": 0.098,
123
  "eval_wer": 100.0,
124
  "step": 10
125
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 19.299592971801758,
14
  "learning_rate": 1e-06,
15
  "loss": 1.1744,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 2.0,
20
+ "grad_norm": 19.29966163635254,
21
  "learning_rate": 2e-06,
22
  "loss": 1.1744,
23
  "step": 2
24
  },
25
  {
26
  "epoch": 2.0,
27
+ "eval_loss": 1.200242280960083,
28
+ "eval_runtime": 10.0049,
29
+ "eval_samples_per_second": 0.2,
30
  "eval_steps_per_second": 0.1,
31
  "eval_wer": 100.0,
32
  "step": 2
33
  },
34
  {
35
  "epoch": 3.0,
36
+ "grad_norm": 14.89444637298584,
37
  "learning_rate": 1.9238795325112867e-06,
38
+ "loss": 1.1151,
39
  "step": 3
40
  },
41
  {
42
  "epoch": 4.0,
43
+ "grad_norm": 7.6878342628479,
44
  "learning_rate": 1.7071067811865474e-06,
45
  "loss": 0.9985,
46
  "step": 4
47
  },
48
  {
49
  "epoch": 4.0,
50
+ "eval_loss": 0.9411421418190002,
51
+ "eval_runtime": 9.8321,
52
+ "eval_samples_per_second": 0.203,
53
  "eval_steps_per_second": 0.102,
54
  "eval_wer": 100.0,
55
  "step": 4
56
  },
57
  {
58
  "epoch": 5.0,
59
+ "grad_norm": 6.6659393310546875,
60
  "learning_rate": 1.3826834323650898e-06,
61
+ "loss": 0.8864,
62
  "step": 5
63
  },
64
  {
65
  "epoch": 6.0,
66
+ "grad_norm": 4.723628520965576,
67
  "learning_rate": 1e-06,
68
  "loss": 0.8454,
69
  "step": 6
70
  },
71
  {
72
  "epoch": 6.0,
73
+ "eval_loss": 0.8363456130027771,
74
+ "eval_runtime": 9.8335,
75
+ "eval_samples_per_second": 0.203,
76
  "eval_steps_per_second": 0.102,
77
  "eval_wer": 100.0,
78
  "step": 6
79
  },
80
  {
81
  "epoch": 7.0,
82
+ "grad_norm": 5.706678867340088,
83
  "learning_rate": 6.173165676349102e-07,
84
  "loss": 0.7801,
85
  "step": 7
86
  },
87
  {
88
  "epoch": 8.0,
89
+ "grad_norm": 6.359007835388184,
90
  "learning_rate": 2.9289321881345254e-07,
91
  "loss": 0.7518,
92
  "step": 8
93
  },
94
  {
95
  "epoch": 8.0,
96
+ "eval_loss": 0.789289653301239,
97
+ "eval_runtime": 10.1491,
98
+ "eval_samples_per_second": 0.197,
99
  "eval_steps_per_second": 0.099,
100
  "eval_wer": 100.0,
101
  "step": 8
102
  },
103
  {
104
  "epoch": 9.0,
105
+ "grad_norm": 5.221818447113037,
106
  "learning_rate": 7.612046748871326e-08,
107
+ "loss": 0.7349,
108
  "step": 9
109
  },
110
  {
111
  "epoch": 10.0,
112
+ "grad_norm": 4.555125713348389,
113
  "learning_rate": 0.0,
114
+ "loss": 0.7248,
115
  "step": 10
116
  },
117
  {
118
  "epoch": 10.0,
119
+ "eval_loss": 0.7805328369140625,
120
+ "eval_runtime": 10.2645,
121
+ "eval_samples_per_second": 0.195,
122
+ "eval_steps_per_second": 0.097,
123
  "eval_wer": 100.0,
124
  "step": 10
125
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed3711bfc6e963e234e8255052fc7b89f06362b5707acbd0b1bc25bfaf55c473
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5e628e2347ada9e4d132d910641cbe455b95ce46d1ef2ee82341be6b19b7b98
3
  size 5432