mirikwa commited on
Commit
dfd2099
1 Parent(s): f326f2a

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +247 -0
trainer_state.json ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "global_step": 11815,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.42,
12
+ "learning_rate": 1.9153618281845116e-05,
13
+ "loss": 0.0365,
14
+ "step": 1000
15
+ },
16
+ {
17
+ "epoch": 0.42,
18
+ "eval_EG_f1": 0.9898869720404521,
19
+ "eval_ET_f1": 0.9967868618350589,
20
+ "eval_TE_f1": 0.988691822402095,
21
+ "eval_loss": 0.0057431175373494625,
22
+ "eval_overall_accuracy": 0.998336295769641,
23
+ "eval_overall_f1": 0.9917886469118172,
24
+ "eval_overall_precision": 0.9914346895074947,
25
+ "eval_overall_recall": 0.9921428571428571,
26
+ "eval_runtime": 105.4808,
27
+ "eval_samples_per_second": 39.818,
28
+ "eval_steps_per_second": 2.493,
29
+ "step": 1000
30
+ },
31
+ {
32
+ "epoch": 0.85,
33
+ "learning_rate": 1.8307236563690226e-05,
34
+ "loss": 0.0079,
35
+ "step": 2000
36
+ },
37
+ {
38
+ "epoch": 0.85,
39
+ "eval_EG_f1": 0.9944080904223678,
40
+ "eval_ET_f1": 0.9988095238095238,
41
+ "eval_TE_f1": 0.9936957297490187,
42
+ "eval_loss": 0.003063528100028634,
43
+ "eval_overall_accuracy": 0.9991894774262353,
44
+ "eval_overall_f1": 0.9956369982547992,
45
+ "eval_overall_precision": 0.995163336504916,
46
+ "eval_overall_recall": 0.9961111111111111,
47
+ "eval_runtime": 103.5359,
48
+ "eval_samples_per_second": 40.566,
49
+ "eval_steps_per_second": 2.54,
50
+ "step": 2000
51
+ },
52
+ {
53
+ "epoch": 1.27,
54
+ "learning_rate": 1.7460854845535337e-05,
55
+ "loss": 0.0043,
56
+ "step": 3000
57
+ },
58
+ {
59
+ "epoch": 1.27,
60
+ "eval_EG_f1": 0.997381575815282,
61
+ "eval_ET_f1": 0.9983337300642704,
62
+ "eval_TE_f1": 0.9963073257891601,
63
+ "eval_loss": 0.0027184109203517437,
64
+ "eval_overall_accuracy": 0.999431212228937,
65
+ "eval_overall_f1": 0.9973411643319179,
66
+ "eval_overall_precision": 0.997380744503532,
67
+ "eval_overall_recall": 0.9973015873015874,
68
+ "eval_runtime": 102.7681,
69
+ "eval_samples_per_second": 40.869,
70
+ "eval_steps_per_second": 2.559,
71
+ "step": 3000
72
+ },
73
+ {
74
+ "epoch": 1.69,
75
+ "learning_rate": 1.661447312738045e-05,
76
+ "loss": 0.0021,
77
+ "step": 4000
78
+ },
79
+ {
80
+ "epoch": 1.69,
81
+ "eval_EG_f1": 0.9942870745060699,
82
+ "eval_ET_f1": 0.9983333333333333,
83
+ "eval_TE_f1": 0.9942857142857143,
84
+ "eval_loss": 0.00385509361512959,
85
+ "eval_overall_accuracy": 0.9992179168147884,
86
+ "eval_overall_f1": 0.9956352670422981,
87
+ "eval_overall_precision": 0.9955562609109665,
88
+ "eval_overall_recall": 0.9957142857142857,
89
+ "eval_runtime": 103.0813,
90
+ "eval_samples_per_second": 40.745,
91
+ "eval_steps_per_second": 2.551,
92
+ "step": 4000
93
+ },
94
+ {
95
+ "epoch": 2.12,
96
+ "learning_rate": 1.5768091409225562e-05,
97
+ "loss": 0.002,
98
+ "step": 5000
99
+ },
100
+ {
101
+ "epoch": 2.12,
102
+ "eval_EG_f1": 0.9958348208972985,
103
+ "eval_ET_f1": 0.9990476190476191,
104
+ "eval_TE_f1": 0.9957142857142857,
105
+ "eval_loss": 0.0032494200859218836,
106
+ "eval_overall_accuracy": 0.9993174546747245,
107
+ "eval_overall_f1": 0.996865452525493,
108
+ "eval_overall_precision": 0.9967468063159565,
109
+ "eval_overall_recall": 0.996984126984127,
110
+ "eval_runtime": 102.2205,
111
+ "eval_samples_per_second": 41.088,
112
+ "eval_steps_per_second": 2.573,
113
+ "step": 5000
114
+ },
115
+ {
116
+ "epoch": 2.54,
117
+ "learning_rate": 1.4921709691070674e-05,
118
+ "loss": 0.0013,
119
+ "step": 6000
120
+ },
121
+ {
122
+ "epoch": 2.54,
123
+ "eval_EG_f1": 0.9952403617325083,
124
+ "eval_ET_f1": 0.998928698964409,
125
+ "eval_TE_f1": 0.994880342897964,
126
+ "eval_loss": 0.003066167002543807,
127
+ "eval_overall_accuracy": 0.9992605758976182,
128
+ "eval_overall_f1": 0.9963497857482939,
129
+ "eval_overall_precision": 0.9961916851793081,
130
+ "eval_overall_recall": 0.9965079365079365,
131
+ "eval_runtime": 102.0329,
132
+ "eval_samples_per_second": 41.163,
133
+ "eval_steps_per_second": 2.578,
134
+ "step": 6000
135
+ },
136
+ {
137
+ "epoch": 2.96,
138
+ "learning_rate": 1.4075327972915787e-05,
139
+ "loss": 0.002,
140
+ "step": 7000
141
+ },
142
+ {
143
+ "epoch": 2.96,
144
+ "eval_EG_f1": 0.9971428571428571,
145
+ "eval_ET_f1": 0.9990476190476191,
146
+ "eval_TE_f1": 0.9971428571428571,
147
+ "eval_loss": 0.00179725990165025,
148
+ "eval_overall_accuracy": 0.9995591894774263,
149
+ "eval_overall_f1": 0.9977777777777778,
150
+ "eval_overall_precision": 0.9977777777777778,
151
+ "eval_overall_recall": 0.9977777777777778,
152
+ "eval_runtime": 104.5567,
153
+ "eval_samples_per_second": 40.17,
154
+ "eval_steps_per_second": 2.515,
155
+ "step": 7000
156
+ },
157
+ {
158
+ "epoch": 3.39,
159
+ "learning_rate": 1.3228946254760899e-05,
160
+ "loss": 0.0009,
161
+ "step": 8000
162
+ },
163
+ {
164
+ "epoch": 3.39,
165
+ "eval_EG_f1": 0.9976196143775291,
166
+ "eval_ET_f1": 0.998690632067611,
167
+ "eval_TE_f1": 0.996904761904762,
168
+ "eval_loss": 0.0014693811535835266,
169
+ "eval_overall_accuracy": 0.999701386420192,
170
+ "eval_overall_f1": 0.9977383644804191,
171
+ "eval_overall_precision": 0.9976196143775291,
172
+ "eval_overall_recall": 0.9978571428571429,
173
+ "eval_runtime": 102.0191,
174
+ "eval_samples_per_second": 41.169,
175
+ "eval_steps_per_second": 2.578,
176
+ "step": 8000
177
+ },
178
+ {
179
+ "epoch": 3.81,
180
+ "learning_rate": 1.238256453660601e-05,
181
+ "loss": 0.0008,
182
+ "step": 9000
183
+ },
184
+ {
185
+ "epoch": 3.81,
186
+ "eval_EG_f1": 0.997381575815282,
187
+ "eval_ET_f1": 0.9992857142857143,
188
+ "eval_TE_f1": 0.9973809523809524,
189
+ "eval_loss": 0.0014952768106013536,
190
+ "eval_overall_accuracy": 0.9997298258087451,
191
+ "eval_overall_f1": 0.998016030473772,
192
+ "eval_overall_precision": 0.9979368354229488,
193
+ "eval_overall_recall": 0.9980952380952381,
194
+ "eval_runtime": 102.1451,
195
+ "eval_samples_per_second": 41.118,
196
+ "eval_steps_per_second": 2.575,
197
+ "step": 9000
198
+ },
199
+ {
200
+ "epoch": 4.23,
201
+ "learning_rate": 1.1536182818451122e-05,
202
+ "loss": 0.0008,
203
+ "step": 10000
204
+ },
205
+ {
206
+ "epoch": 4.23,
207
+ "eval_EG_f1": 0.9976190476190476,
208
+ "eval_ET_f1": 0.9983333333333333,
209
+ "eval_TE_f1": 0.9972615787593762,
210
+ "eval_loss": 0.00143534317612648,
211
+ "eval_overall_accuracy": 0.9997298258087451,
212
+ "eval_overall_f1": 0.9977380054764079,
213
+ "eval_overall_precision": 0.9977776013969363,
214
+ "eval_overall_recall": 0.9976984126984128,
215
+ "eval_runtime": 103.0577,
216
+ "eval_samples_per_second": 40.754,
217
+ "eval_steps_per_second": 2.552,
218
+ "step": 10000
219
+ },
220
+ {
221
+ "epoch": 4.66,
222
+ "learning_rate": 1.0689801100296236e-05,
223
+ "loss": 0.0005,
224
+ "step": 11000
225
+ },
226
+ {
227
+ "epoch": 4.66,
228
+ "eval_EG_f1": 0.9974997023455173,
229
+ "eval_ET_f1": 0.9995238095238095,
230
+ "eval_TE_f1": 0.9973809523809524,
231
+ "eval_loss": 0.0019883255008608103,
232
+ "eval_overall_accuracy": 0.9996729470316388,
233
+ "eval_overall_f1": 0.9981348466208977,
234
+ "eval_overall_precision": 0.9981744582903405,
235
+ "eval_overall_recall": 0.9980952380952381,
236
+ "eval_runtime": 102.2987,
237
+ "eval_samples_per_second": 41.056,
238
+ "eval_steps_per_second": 2.571,
239
+ "step": 11000
240
+ }
241
+ ],
242
+ "max_steps": 23630,
243
+ "num_train_epochs": 10,
244
+ "total_flos": 2874592437250464.0,
245
+ "trial_name": null,
246
+ "trial_params": null
247
+ }