mirikwa commited on
Commit
29e894b
1 Parent(s): 8c5a311

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +226 -0
trainer_state.json ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "global_step": 17610,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "learning_rate": 1.8e-05,
13
+ "loss": 0.0167,
14
+ "step": 1761
15
+ },
16
+ {
17
+ "epoch": 1.0,
18
+ "eval_EG_f1": 0.9978260869565218,
19
+ "eval_ET_f1": 0.9987063389391979,
20
+ "eval_TE_f1": 0.9969821673525378,
21
+ "eval_loss": 0.005893823690712452,
22
+ "eval_overall_accuracy": 0.998933354402876,
23
+ "eval_overall_f1": 0.9976327358809605,
24
+ "eval_overall_precision": 0.9975202885482417,
25
+ "eval_overall_recall": 0.9977452085682075,
26
+ "eval_runtime": 59.0341,
27
+ "eval_samples_per_second": 53.037,
28
+ "eval_steps_per_second": 3.32,
29
+ "step": 1761
30
+ },
31
+ {
32
+ "epoch": 2.0,
33
+ "learning_rate": 1.6000000000000003e-05,
34
+ "loss": 0.0048,
35
+ "step": 3522
36
+ },
37
+ {
38
+ "epoch": 2.0,
39
+ "eval_EG_f1": 0.9950846531949754,
40
+ "eval_ET_f1": 0.9974059662775615,
41
+ "eval_TE_f1": 0.9942763695829926,
42
+ "eval_loss": 0.021830907091498375,
43
+ "eval_overall_accuracy": 0.9975506656658634,
44
+ "eval_overall_f1": 0.9951538374845036,
45
+ "eval_overall_precision": 0.9948174853537629,
46
+ "eval_overall_recall": 0.9954904171364148,
47
+ "eval_runtime": 58.5084,
48
+ "eval_samples_per_second": 53.514,
49
+ "eval_steps_per_second": 3.35,
50
+ "step": 3522
51
+ },
52
+ {
53
+ "epoch": 3.0,
54
+ "learning_rate": 1.4e-05,
55
+ "loss": 0.003,
56
+ "step": 5283
57
+ },
58
+ {
59
+ "epoch": 3.0,
60
+ "eval_EG_f1": 0.9939989088925258,
61
+ "eval_ET_f1": 0.9987063389391979,
62
+ "eval_TE_f1": 0.9942638623326959,
63
+ "eval_loss": 0.022889936342835426,
64
+ "eval_overall_accuracy": 0.9971556117410026,
65
+ "eval_overall_f1": 0.9949284345768061,
66
+ "eval_overall_precision": 0.9945921586300135,
67
+ "eval_overall_recall": 0.9952649379932357,
68
+ "eval_runtime": 57.8629,
69
+ "eval_samples_per_second": 54.111,
70
+ "eval_steps_per_second": 3.387,
71
+ "step": 5283
72
+ },
73
+ {
74
+ "epoch": 4.0,
75
+ "learning_rate": 1.2e-05,
76
+ "loss": 0.002,
77
+ "step": 7044
78
+ },
79
+ {
80
+ "epoch": 4.0,
81
+ "eval_EG_f1": 0.9961977186311787,
82
+ "eval_ET_f1": 0.9987046632124352,
83
+ "eval_TE_f1": 0.9958847736625515,
84
+ "eval_loss": 0.017046082764863968,
85
+ "eval_overall_accuracy": 0.9982222573381266,
86
+ "eval_overall_f1": 0.9965054672528464,
87
+ "eval_overall_precision": 0.9963931469792606,
88
+ "eval_overall_recall": 0.9966178128523112,
89
+ "eval_runtime": 57.0371,
90
+ "eval_samples_per_second": 54.894,
91
+ "eval_steps_per_second": 3.436,
92
+ "step": 7044
93
+ },
94
+ {
95
+ "epoch": 5.0,
96
+ "learning_rate": 1e-05,
97
+ "loss": 0.0013,
98
+ "step": 8805
99
+ },
100
+ {
101
+ "epoch": 5.0,
102
+ "eval_EG_f1": 0.9970068027210883,
103
+ "eval_ET_f1": 0.9987046632124352,
104
+ "eval_TE_f1": 0.9961643835616438,
105
+ "eval_loss": 0.011512575671076775,
106
+ "eval_overall_accuracy": 0.9984987950855292,
107
+ "eval_overall_f1": 0.9969556883526891,
108
+ "eval_overall_precision": 0.9970681100586378,
109
+ "eval_overall_recall": 0.9968432919954904,
110
+ "eval_runtime": 57.8352,
111
+ "eval_samples_per_second": 54.137,
112
+ "eval_steps_per_second": 3.389,
113
+ "step": 8805
114
+ },
115
+ {
116
+ "epoch": 6.0,
117
+ "learning_rate": 8.000000000000001e-06,
118
+ "loss": 0.0015,
119
+ "step": 10566
120
+ },
121
+ {
122
+ "epoch": 6.0,
123
+ "eval_EG_f1": 0.9948271167982575,
124
+ "eval_ET_f1": 0.9980557355800389,
125
+ "eval_TE_f1": 0.9948073244055754,
126
+ "eval_loss": 0.01284080371260643,
127
+ "eval_overall_accuracy": 0.9979457195907241,
128
+ "eval_overall_f1": 0.9953802816901409,
129
+ "eval_overall_precision": 0.9948198198198198,
130
+ "eval_overall_recall": 0.9959413754227734,
131
+ "eval_runtime": 57.2307,
132
+ "eval_samples_per_second": 54.708,
133
+ "eval_steps_per_second": 3.425,
134
+ "step": 10566
135
+ },
136
+ {
137
+ "epoch": 7.0,
138
+ "learning_rate": 6e-06,
139
+ "loss": 0.0003,
140
+ "step": 12327
141
+ },
142
+ {
143
+ "epoch": 7.0,
144
+ "eval_EG_f1": 0.9953615279672577,
145
+ "eval_ET_f1": 0.9993527508090615,
146
+ "eval_TE_f1": 0.9959049959049959,
147
+ "eval_loss": 0.021282300353050232,
148
+ "eval_overall_accuracy": 0.9981037411606685,
149
+ "eval_overall_f1": 0.9962808520229911,
150
+ "eval_overall_precision": 0.9959441189725101,
151
+ "eval_overall_recall": 0.9966178128523112,
152
+ "eval_runtime": 57.2264,
153
+ "eval_samples_per_second": 54.713,
154
+ "eval_steps_per_second": 3.425,
155
+ "step": 12327
156
+ },
157
+ {
158
+ "epoch": 8.0,
159
+ "learning_rate": 4.000000000000001e-06,
160
+ "loss": 0.0005,
161
+ "step": 14088
162
+ },
163
+ {
164
+ "epoch": 8.0,
165
+ "eval_EG_f1": 0.9961852861035423,
166
+ "eval_ET_f1": 0.9993527508090615,
167
+ "eval_TE_f1": 0.9961706783369804,
168
+ "eval_loss": 0.01351018063724041,
169
+ "eval_overall_accuracy": 0.9983802789080709,
170
+ "eval_overall_f1": 0.9967309209784693,
171
+ "eval_overall_precision": 0.9966185752930569,
172
+ "eval_overall_recall": 0.9968432919954904,
173
+ "eval_runtime": 57.0797,
174
+ "eval_samples_per_second": 54.853,
175
+ "eval_steps_per_second": 3.434,
176
+ "step": 14088
177
+ },
178
+ {
179
+ "epoch": 9.0,
180
+ "learning_rate": 2.0000000000000003e-06,
181
+ "loss": 0.0002,
182
+ "step": 15849
183
+ },
184
+ {
185
+ "epoch": 9.0,
186
+ "eval_EG_f1": 0.9972781709308656,
187
+ "eval_ET_f1": 0.9993527508090615,
188
+ "eval_TE_f1": 0.9969871268145714,
189
+ "eval_loss": 0.014631124213337898,
190
+ "eval_overall_accuracy": 0.9986568166554735,
191
+ "eval_overall_f1": 0.9975197294250282,
192
+ "eval_overall_precision": 0.9975197294250282,
193
+ "eval_overall_recall": 0.9975197294250282,
194
+ "eval_runtime": 57.0354,
195
+ "eval_samples_per_second": 54.896,
196
+ "eval_steps_per_second": 3.436,
197
+ "step": 15849
198
+ },
199
+ {
200
+ "epoch": 10.0,
201
+ "learning_rate": 0.0,
202
+ "loss": 0.0001,
203
+ "step": 17610
204
+ },
205
+ {
206
+ "epoch": 10.0,
207
+ "eval_EG_f1": 0.9970051728832018,
208
+ "eval_ET_f1": 0.9993527508090615,
209
+ "eval_TE_f1": 0.9967141292442497,
210
+ "eval_loss": 0.014984946697950363,
211
+ "eval_overall_accuracy": 0.9984987950855292,
212
+ "eval_overall_f1": 0.9972942502818489,
213
+ "eval_overall_precision": 0.9972942502818489,
214
+ "eval_overall_recall": 0.9972942502818489,
215
+ "eval_runtime": 56.8354,
216
+ "eval_samples_per_second": 55.089,
217
+ "eval_steps_per_second": 3.449,
218
+ "step": 17610
219
+ }
220
+ ],
221
+ "max_steps": 17610,
222
+ "num_train_epochs": 10,
223
+ "total_flos": 3085811947058688.0,
224
+ "trial_name": null,
225
+ "trial_params": null
226
+ }