juancopi81 commited on
Commit
048e4e0
·
1 Parent(s): 68d6d90

End of training

Browse files
all_results.json CHANGED
@@ -1,7 +1,12 @@
1
  {
2
  "epoch": 1.03,
3
- "train_loss": 0.13366424560546875,
4
- "train_runtime": 7967.969,
5
- "train_samples_per_second": 4.016,
6
- "train_steps_per_second": 0.063
 
 
 
 
 
7
  }
 
1
  {
2
  "epoch": 1.03,
3
+ "eval_loss": 0.1700439453125,
4
+ "eval_runtime": 3035.8435,
5
+ "eval_samples_per_second": 5.112,
6
+ "eval_steps_per_second": 0.16,
7
+ "eval_wer": 5.696169637752716,
8
+ "train_loss": 0.0865213623046875,
9
+ "train_runtime": 8011.6489,
10
+ "train_samples_per_second": 3.994,
11
+ "train_steps_per_second": 0.062
12
  }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.03,
3
+ "eval_loss": 0.1700439453125,
4
+ "eval_runtime": 3035.8435,
5
+ "eval_samples_per_second": 5.112,
6
+ "eval_steps_per_second": 0.16,
7
+ "eval_wer": 5.696169637752716
8
+ }
runs/Dec16_17-56-05_132-145-140-45/events.out.tfevents.1671224581.132-145-140-45.3273636.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0f139750e42de87e1b9e8d503d3b270351c8c4aa7e5d263d51bdf026118d574
3
+ size 358
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 1.03,
3
- "train_loss": 0.13366424560546875,
4
- "train_runtime": 7967.969,
5
- "train_samples_per_second": 4.016,
6
- "train_steps_per_second": 0.063
7
  }
 
1
  {
2
  "epoch": 1.03,
3
+ "train_loss": 0.0865213623046875,
4
+ "train_runtime": 8011.6489,
5
+ "train_samples_per_second": 3.994,
6
+ "train_steps_per_second": 0.062
7
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 5.735798555168651,
3
  "best_model_checkpoint": "./checkpoint-500",
4
  "epoch": 1.03,
5
  "global_step": 500,
@@ -9,141 +9,141 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.05,
12
- "learning_rate": 6.611096473669595e-07,
13
- "loss": 0.6089,
14
  "step": 25
15
  },
16
  {
17
  "epoch": 0.1,
18
- "learning_rate": 8.313789158407869e-07,
19
- "loss": 0.1002,
20
  "step": 50
21
  },
22
  {
23
  "epoch": 0.15,
24
- "learning_rate": 9.256291743595375e-07,
25
- "loss": 0.0973,
26
  "step": 75
27
  },
28
  {
29
  "epoch": 0.2,
30
- "learning_rate": 9.91135616519784e-07,
31
- "loss": 0.096,
32
  "step": 100
33
  },
34
  {
35
  "epoch": 0.25,
36
- "learning_rate": 1e-06,
37
- "loss": 0.1061,
38
  "step": 125
39
  },
40
  {
41
  "epoch": 0.3,
42
- "learning_rate": 1e-06,
43
- "loss": 0.0993,
44
  "step": 150
45
  },
46
  {
47
  "epoch": 0.35,
48
- "learning_rate": 1e-06,
49
- "loss": 0.0994,
50
  "step": 175
51
  },
52
  {
53
  "epoch": 0.4,
54
- "learning_rate": 1e-06,
55
- "loss": 0.0964,
56
  "step": 200
57
  },
58
  {
59
  "epoch": 0.45,
60
- "learning_rate": 1e-06,
61
- "loss": 0.0959,
62
  "step": 225
63
  },
64
  {
65
  "epoch": 0.5,
66
- "learning_rate": 1e-06,
67
- "loss": 0.1011,
68
  "step": 250
69
  },
70
  {
71
  "epoch": 0.55,
72
- "learning_rate": 1e-06,
73
- "loss": 0.0976,
74
  "step": 275
75
  },
76
  {
77
  "epoch": 0.6,
78
- "learning_rate": 1e-06,
79
- "loss": 0.0995,
80
  "step": 300
81
  },
82
  {
83
  "epoch": 0.65,
84
- "learning_rate": 1e-06,
85
- "loss": 0.1059,
86
  "step": 325
87
  },
88
  {
89
  "epoch": 0.7,
90
- "learning_rate": 1e-06,
91
- "loss": 0.1056,
92
  "step": 350
93
  },
94
  {
95
  "epoch": 0.75,
96
- "learning_rate": 1e-06,
97
- "loss": 0.1078,
98
  "step": 375
99
  },
100
  {
101
  "epoch": 0.8,
102
- "learning_rate": 1e-06,
103
- "loss": 0.1122,
104
  "step": 400
105
  },
106
  {
107
  "epoch": 0.85,
108
- "learning_rate": 1e-06,
109
- "loss": 0.1098,
110
  "step": 425
111
  },
112
  {
113
  "epoch": 0.9,
114
- "learning_rate": 1e-06,
115
- "loss": 0.1163,
116
  "step": 450
117
  },
118
  {
119
  "epoch": 0.95,
120
- "learning_rate": 1e-06,
121
- "loss": 0.1209,
122
  "step": 475
123
  },
124
  {
125
  "epoch": 1.03,
126
- "learning_rate": 1e-06,
127
- "loss": 0.1972,
128
  "step": 500
129
  },
130
  {
131
  "epoch": 1.03,
132
- "eval_loss": 0.1683349609375,
133
- "eval_runtime": 3047.5288,
134
- "eval_samples_per_second": 5.093,
135
  "eval_steps_per_second": 0.159,
136
- "eval_wer": 5.735798555168651,
137
  "step": 500
138
  },
139
  {
140
  "epoch": 1.03,
141
  "step": 500,
142
  "total_flos": 3.2659357379539763e+19,
143
- "train_loss": 0.13366424560546875,
144
- "train_runtime": 7967.969,
145
- "train_samples_per_second": 4.016,
146
- "train_steps_per_second": 0.063
147
  }
148
  ],
149
  "max_steps": 500,
 
1
  {
2
+ "best_metric": 5.696169637752716,
3
  "best_model_checkpoint": "./checkpoint-500",
4
  "epoch": 1.03,
5
  "global_step": 500,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.05,
12
+ "learning_rate": 3.4043195900439816e-07,
13
+ "loss": 0.0907,
14
  "step": 25
15
  },
16
  {
17
  "epoch": 0.1,
18
+ "learning_rate": 4.203103093438967e-07,
19
+ "loss": 0.0802,
20
  "step": 50
21
  },
22
  {
23
  "epoch": 0.15,
24
+ "learning_rate": 4.6583071503011386e-07,
25
+ "loss": 0.0774,
26
  "step": 75
27
  },
28
  {
29
  "epoch": 0.2,
30
+ "learning_rate": 4.978065189231237e-07,
31
+ "loss": 0.0746,
32
  "step": 100
33
  },
34
  {
35
  "epoch": 0.25,
36
+ "learning_rate": 5e-07,
37
+ "loss": 0.0834,
38
  "step": 125
39
  },
40
  {
41
  "epoch": 0.3,
42
+ "learning_rate": 5e-07,
43
+ "loss": 0.0775,
44
  "step": 150
45
  },
46
  {
47
  "epoch": 0.35,
48
+ "learning_rate": 5e-07,
49
+ "loss": 0.0784,
50
  "step": 175
51
  },
52
  {
53
  "epoch": 0.4,
54
+ "learning_rate": 5e-07,
55
+ "loss": 0.0756,
56
  "step": 200
57
  },
58
  {
59
  "epoch": 0.45,
60
+ "learning_rate": 5e-07,
61
+ "loss": 0.0744,
62
  "step": 225
63
  },
64
  {
65
  "epoch": 0.5,
66
+ "learning_rate": 5e-07,
67
+ "loss": 0.0801,
68
  "step": 250
69
  },
70
  {
71
  "epoch": 0.55,
72
+ "learning_rate": 5e-07,
73
+ "loss": 0.0768,
74
  "step": 275
75
  },
76
  {
77
  "epoch": 0.6,
78
+ "learning_rate": 5e-07,
79
+ "loss": 0.0795,
80
  "step": 300
81
  },
82
  {
83
  "epoch": 0.65,
84
+ "learning_rate": 5e-07,
85
+ "loss": 0.0837,
86
  "step": 325
87
  },
88
  {
89
  "epoch": 0.7,
90
+ "learning_rate": 5e-07,
91
+ "loss": 0.0827,
92
  "step": 350
93
  },
94
  {
95
  "epoch": 0.75,
96
+ "learning_rate": 5e-07,
97
+ "loss": 0.0858,
98
  "step": 375
99
  },
100
  {
101
  "epoch": 0.8,
102
+ "learning_rate": 5e-07,
103
+ "loss": 0.088,
104
  "step": 400
105
  },
106
  {
107
  "epoch": 0.85,
108
+ "learning_rate": 5e-07,
109
+ "loss": 0.0865,
110
  "step": 425
111
  },
112
  {
113
  "epoch": 0.9,
114
+ "learning_rate": 5e-07,
115
+ "loss": 0.0911,
116
  "step": 450
117
  },
118
  {
119
  "epoch": 0.95,
120
+ "learning_rate": 5e-07,
121
+ "loss": 0.0976,
122
  "step": 475
123
  },
124
  {
125
  "epoch": 1.03,
126
+ "learning_rate": 5e-07,
127
+ "loss": 0.1665,
128
  "step": 500
129
  },
130
  {
131
  "epoch": 1.03,
132
+ "eval_loss": 0.1700439453125,
133
+ "eval_runtime": 3050.6887,
134
+ "eval_samples_per_second": 5.087,
135
  "eval_steps_per_second": 0.159,
136
+ "eval_wer": 5.696169637752716,
137
  "step": 500
138
  },
139
  {
140
  "epoch": 1.03,
141
  "step": 500,
142
  "total_flos": 3.2659357379539763e+19,
143
+ "train_loss": 0.0865213623046875,
144
+ "train_runtime": 8011.6489,
145
+ "train_samples_per_second": 3.994,
146
+ "train_steps_per_second": 0.062
147
  }
148
  ],
149
  "max_steps": 500,