Training in progress, step 16, checkpoint
Browse files- last-checkpoint/config.json +7 -36
- last-checkpoint/model.safetensors +2 -2
- last-checkpoint/optimizer.pt +2 -2
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/special_tokens_map.json +4 -26
- last-checkpoint/spiece.model +2 -2
- last-checkpoint/tokenizer.json +0 -0
- last-checkpoint/tokenizer_config.json +100 -135
- last-checkpoint/trainer_state.json +55 -1507
- last-checkpoint/training_args.bin +1 -1
last-checkpoint/config.json
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"T5ForConditionalGeneration"
|
5 |
],
|
6 |
"classifier_dropout": 0.0,
|
7 |
-
"d_ff":
|
8 |
"d_kv": 64,
|
9 |
-
"d_model":
|
10 |
"decoder_start_token_id": 0,
|
11 |
"dense_act_fn": "relu",
|
12 |
"dropout_rate": 0.1,
|
@@ -18,44 +18,15 @@
|
|
18 |
"layer_norm_epsilon": 1e-06,
|
19 |
"model_type": "t5",
|
20 |
"n_positions": 512,
|
21 |
-
"num_decoder_layers":
|
22 |
-
"num_heads":
|
23 |
-
"num_layers":
|
24 |
"output_past": true,
|
25 |
"pad_token_id": 0,
|
26 |
"relative_attention_max_distance": 128,
|
27 |
"relative_attention_num_buckets": 32,
|
28 |
-
"task_specific_params": {
|
29 |
-
"summarization": {
|
30 |
-
"early_stopping": true,
|
31 |
-
"length_penalty": 2.0,
|
32 |
-
"max_length": 200,
|
33 |
-
"min_length": 30,
|
34 |
-
"no_repeat_ngram_size": 3,
|
35 |
-
"num_beams": 4,
|
36 |
-
"prefix": "summarize: "
|
37 |
-
},
|
38 |
-
"translation_en_to_de": {
|
39 |
-
"early_stopping": true,
|
40 |
-
"max_length": 300,
|
41 |
-
"num_beams": 4,
|
42 |
-
"prefix": "translate English to German: "
|
43 |
-
},
|
44 |
-
"translation_en_to_fr": {
|
45 |
-
"early_stopping": true,
|
46 |
-
"max_length": 300,
|
47 |
-
"num_beams": 4,
|
48 |
-
"prefix": "translate English to French: "
|
49 |
-
},
|
50 |
-
"translation_en_to_ro": {
|
51 |
-
"early_stopping": true,
|
52 |
-
"max_length": 300,
|
53 |
-
"num_beams": 4,
|
54 |
-
"prefix": "translate English to Romanian: "
|
55 |
-
}
|
56 |
-
},
|
57 |
"torch_dtype": "float32",
|
58 |
"transformers_version": "4.42.3",
|
59 |
"use_cache": true,
|
60 |
-
"vocab_size":
|
61 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "VietAI/vit5-base",
|
3 |
"architectures": [
|
4 |
"T5ForConditionalGeneration"
|
5 |
],
|
6 |
"classifier_dropout": 0.0,
|
7 |
+
"d_ff": 3072,
|
8 |
"d_kv": 64,
|
9 |
+
"d_model": 768,
|
10 |
"decoder_start_token_id": 0,
|
11 |
"dense_act_fn": "relu",
|
12 |
"dropout_rate": 0.1,
|
|
|
18 |
"layer_norm_epsilon": 1e-06,
|
19 |
"model_type": "t5",
|
20 |
"n_positions": 512,
|
21 |
+
"num_decoder_layers": 12,
|
22 |
+
"num_heads": 12,
|
23 |
+
"num_layers": 12,
|
24 |
"output_past": true,
|
25 |
"pad_token_id": 0,
|
26 |
"relative_attention_max_distance": 128,
|
27 |
"relative_attention_num_buckets": 32,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
"torch_dtype": "float32",
|
29 |
"transformers_version": "4.42.3",
|
30 |
"use_cache": true,
|
31 |
+
"vocab_size": 36096
|
32 |
}
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:119eea453bd7b4029fcdef03fe0a7106f3b8552ddeebbf478f83f4f695d0bc27
|
3 |
+
size 903834408
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:097f2da6ca0542c33e534858431ad08295fb5b03025fd8e94bda6c3ada974000
|
3 |
+
size 1807824186
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2fd40d17872e1d1f44fd19772092ee3ba49e037d722a9306b6de7d846efa46dc
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e37c62b1a4bff2525d39b9bf2be166315395310f82e667777248052632e706f7
|
3 |
size 1064
|
last-checkpoint/special_tokens_map.json
CHANGED
@@ -95,31 +95,9 @@
|
|
95 |
"<extra_id_92>",
|
96 |
"<extra_id_93>",
|
97 |
"<extra_id_94>",
|
98 |
-
"<extra_id_95>"
|
99 |
-
"<extra_id_96>",
|
100 |
-
"<extra_id_97>",
|
101 |
-
"<extra_id_98>",
|
102 |
-
"<extra_id_99>"
|
103 |
],
|
104 |
-
"eos_token":
|
105 |
-
|
106 |
-
|
107 |
-
"normalized": false,
|
108 |
-
"rstrip": false,
|
109 |
-
"single_word": false
|
110 |
-
},
|
111 |
-
"pad_token": {
|
112 |
-
"content": "<pad>",
|
113 |
-
"lstrip": false,
|
114 |
-
"normalized": false,
|
115 |
-
"rstrip": false,
|
116 |
-
"single_word": false
|
117 |
-
},
|
118 |
-
"unk_token": {
|
119 |
-
"content": "<unk>",
|
120 |
-
"lstrip": false,
|
121 |
-
"normalized": false,
|
122 |
-
"rstrip": false,
|
123 |
-
"single_word": false
|
124 |
-
}
|
125 |
}
|
|
|
95 |
"<extra_id_92>",
|
96 |
"<extra_id_93>",
|
97 |
"<extra_id_94>",
|
98 |
+
"<extra_id_95>"
|
|
|
|
|
|
|
|
|
99 |
],
|
100 |
+
"eos_token": "</s>",
|
101 |
+
"pad_token": "<pad>",
|
102 |
+
"unk_token": "<unk>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
}
|
last-checkpoint/spiece.model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59986b62f9f0b90edafb9b073ea7b93d21114a5841219a1ea2399ade73f729c6
|
3 |
+
size 820370
|
last-checkpoint/tokenizer.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
last-checkpoint/tokenizer_config.json
CHANGED
@@ -24,39 +24,7 @@
|
|
24 |
"single_word": false,
|
25 |
"special": true
|
26 |
},
|
27 |
-
"
|
28 |
-
"content": "<extra_id_99>",
|
29 |
-
"lstrip": false,
|
30 |
-
"normalized": false,
|
31 |
-
"rstrip": false,
|
32 |
-
"single_word": false,
|
33 |
-
"special": true
|
34 |
-
},
|
35 |
-
"32001": {
|
36 |
-
"content": "<extra_id_98>",
|
37 |
-
"lstrip": false,
|
38 |
-
"normalized": false,
|
39 |
-
"rstrip": false,
|
40 |
-
"single_word": false,
|
41 |
-
"special": true
|
42 |
-
},
|
43 |
-
"32002": {
|
44 |
-
"content": "<extra_id_97>",
|
45 |
-
"lstrip": false,
|
46 |
-
"normalized": false,
|
47 |
-
"rstrip": false,
|
48 |
-
"single_word": false,
|
49 |
-
"special": true
|
50 |
-
},
|
51 |
-
"32003": {
|
52 |
-
"content": "<extra_id_96>",
|
53 |
-
"lstrip": false,
|
54 |
-
"normalized": false,
|
55 |
-
"rstrip": false,
|
56 |
-
"single_word": false,
|
57 |
-
"special": true
|
58 |
-
},
|
59 |
-
"32004": {
|
60 |
"content": "<extra_id_95>",
|
61 |
"lstrip": false,
|
62 |
"normalized": false,
|
@@ -64,7 +32,7 @@
|
|
64 |
"single_word": false,
|
65 |
"special": true
|
66 |
},
|
67 |
-
"
|
68 |
"content": "<extra_id_94>",
|
69 |
"lstrip": false,
|
70 |
"normalized": false,
|
@@ -72,7 +40,7 @@
|
|
72 |
"single_word": false,
|
73 |
"special": true
|
74 |
},
|
75 |
-
"
|
76 |
"content": "<extra_id_93>",
|
77 |
"lstrip": false,
|
78 |
"normalized": false,
|
@@ -80,7 +48,7 @@
|
|
80 |
"single_word": false,
|
81 |
"special": true
|
82 |
},
|
83 |
-
"
|
84 |
"content": "<extra_id_92>",
|
85 |
"lstrip": false,
|
86 |
"normalized": false,
|
@@ -88,7 +56,7 @@
|
|
88 |
"single_word": false,
|
89 |
"special": true
|
90 |
},
|
91 |
-
"
|
92 |
"content": "<extra_id_91>",
|
93 |
"lstrip": false,
|
94 |
"normalized": false,
|
@@ -96,7 +64,7 @@
|
|
96 |
"single_word": false,
|
97 |
"special": true
|
98 |
},
|
99 |
-
"
|
100 |
"content": "<extra_id_90>",
|
101 |
"lstrip": false,
|
102 |
"normalized": false,
|
@@ -104,7 +72,7 @@
|
|
104 |
"single_word": false,
|
105 |
"special": true
|
106 |
},
|
107 |
-
"
|
108 |
"content": "<extra_id_89>",
|
109 |
"lstrip": false,
|
110 |
"normalized": false,
|
@@ -112,7 +80,7 @@
|
|
112 |
"single_word": false,
|
113 |
"special": true
|
114 |
},
|
115 |
-
"
|
116 |
"content": "<extra_id_88>",
|
117 |
"lstrip": false,
|
118 |
"normalized": false,
|
@@ -120,7 +88,7 @@
|
|
120 |
"single_word": false,
|
121 |
"special": true
|
122 |
},
|
123 |
-
"
|
124 |
"content": "<extra_id_87>",
|
125 |
"lstrip": false,
|
126 |
"normalized": false,
|
@@ -128,7 +96,7 @@
|
|
128 |
"single_word": false,
|
129 |
"special": true
|
130 |
},
|
131 |
-
"
|
132 |
"content": "<extra_id_86>",
|
133 |
"lstrip": false,
|
134 |
"normalized": false,
|
@@ -136,7 +104,7 @@
|
|
136 |
"single_word": false,
|
137 |
"special": true
|
138 |
},
|
139 |
-
"
|
140 |
"content": "<extra_id_85>",
|
141 |
"lstrip": false,
|
142 |
"normalized": false,
|
@@ -144,7 +112,7 @@
|
|
144 |
"single_word": false,
|
145 |
"special": true
|
146 |
},
|
147 |
-
"
|
148 |
"content": "<extra_id_84>",
|
149 |
"lstrip": false,
|
150 |
"normalized": false,
|
@@ -152,7 +120,7 @@
|
|
152 |
"single_word": false,
|
153 |
"special": true
|
154 |
},
|
155 |
-
"
|
156 |
"content": "<extra_id_83>",
|
157 |
"lstrip": false,
|
158 |
"normalized": false,
|
@@ -160,7 +128,7 @@
|
|
160 |
"single_word": false,
|
161 |
"special": true
|
162 |
},
|
163 |
-
"
|
164 |
"content": "<extra_id_82>",
|
165 |
"lstrip": false,
|
166 |
"normalized": false,
|
@@ -168,7 +136,7 @@
|
|
168 |
"single_word": false,
|
169 |
"special": true
|
170 |
},
|
171 |
-
"
|
172 |
"content": "<extra_id_81>",
|
173 |
"lstrip": false,
|
174 |
"normalized": false,
|
@@ -176,7 +144,7 @@
|
|
176 |
"single_word": false,
|
177 |
"special": true
|
178 |
},
|
179 |
-
"
|
180 |
"content": "<extra_id_80>",
|
181 |
"lstrip": false,
|
182 |
"normalized": false,
|
@@ -184,7 +152,7 @@
|
|
184 |
"single_word": false,
|
185 |
"special": true
|
186 |
},
|
187 |
-
"
|
188 |
"content": "<extra_id_79>",
|
189 |
"lstrip": false,
|
190 |
"normalized": false,
|
@@ -192,7 +160,7 @@
|
|
192 |
"single_word": false,
|
193 |
"special": true
|
194 |
},
|
195 |
-
"
|
196 |
"content": "<extra_id_78>",
|
197 |
"lstrip": false,
|
198 |
"normalized": false,
|
@@ -200,7 +168,7 @@
|
|
200 |
"single_word": false,
|
201 |
"special": true
|
202 |
},
|
203 |
-
"
|
204 |
"content": "<extra_id_77>",
|
205 |
"lstrip": false,
|
206 |
"normalized": false,
|
@@ -208,7 +176,7 @@
|
|
208 |
"single_word": false,
|
209 |
"special": true
|
210 |
},
|
211 |
-
"
|
212 |
"content": "<extra_id_76>",
|
213 |
"lstrip": false,
|
214 |
"normalized": false,
|
@@ -216,7 +184,7 @@
|
|
216 |
"single_word": false,
|
217 |
"special": true
|
218 |
},
|
219 |
-
"
|
220 |
"content": "<extra_id_75>",
|
221 |
"lstrip": false,
|
222 |
"normalized": false,
|
@@ -224,7 +192,7 @@
|
|
224 |
"single_word": false,
|
225 |
"special": true
|
226 |
},
|
227 |
-
"
|
228 |
"content": "<extra_id_74>",
|
229 |
"lstrip": false,
|
230 |
"normalized": false,
|
@@ -232,7 +200,7 @@
|
|
232 |
"single_word": false,
|
233 |
"special": true
|
234 |
},
|
235 |
-
"
|
236 |
"content": "<extra_id_73>",
|
237 |
"lstrip": false,
|
238 |
"normalized": false,
|
@@ -240,7 +208,7 @@
|
|
240 |
"single_word": false,
|
241 |
"special": true
|
242 |
},
|
243 |
-
"
|
244 |
"content": "<extra_id_72>",
|
245 |
"lstrip": false,
|
246 |
"normalized": false,
|
@@ -248,7 +216,7 @@
|
|
248 |
"single_word": false,
|
249 |
"special": true
|
250 |
},
|
251 |
-
"
|
252 |
"content": "<extra_id_71>",
|
253 |
"lstrip": false,
|
254 |
"normalized": false,
|
@@ -256,7 +224,7 @@
|
|
256 |
"single_word": false,
|
257 |
"special": true
|
258 |
},
|
259 |
-
"
|
260 |
"content": "<extra_id_70>",
|
261 |
"lstrip": false,
|
262 |
"normalized": false,
|
@@ -264,7 +232,7 @@
|
|
264 |
"single_word": false,
|
265 |
"special": true
|
266 |
},
|
267 |
-
"
|
268 |
"content": "<extra_id_69>",
|
269 |
"lstrip": false,
|
270 |
"normalized": false,
|
@@ -272,7 +240,7 @@
|
|
272 |
"single_word": false,
|
273 |
"special": true
|
274 |
},
|
275 |
-
"
|
276 |
"content": "<extra_id_68>",
|
277 |
"lstrip": false,
|
278 |
"normalized": false,
|
@@ -280,7 +248,7 @@
|
|
280 |
"single_word": false,
|
281 |
"special": true
|
282 |
},
|
283 |
-
"
|
284 |
"content": "<extra_id_67>",
|
285 |
"lstrip": false,
|
286 |
"normalized": false,
|
@@ -288,7 +256,7 @@
|
|
288 |
"single_word": false,
|
289 |
"special": true
|
290 |
},
|
291 |
-
"
|
292 |
"content": "<extra_id_66>",
|
293 |
"lstrip": false,
|
294 |
"normalized": false,
|
@@ -296,7 +264,7 @@
|
|
296 |
"single_word": false,
|
297 |
"special": true
|
298 |
},
|
299 |
-
"
|
300 |
"content": "<extra_id_65>",
|
301 |
"lstrip": false,
|
302 |
"normalized": false,
|
@@ -304,7 +272,7 @@
|
|
304 |
"single_word": false,
|
305 |
"special": true
|
306 |
},
|
307 |
-
"
|
308 |
"content": "<extra_id_64>",
|
309 |
"lstrip": false,
|
310 |
"normalized": false,
|
@@ -312,7 +280,7 @@
|
|
312 |
"single_word": false,
|
313 |
"special": true
|
314 |
},
|
315 |
-
"
|
316 |
"content": "<extra_id_63>",
|
317 |
"lstrip": false,
|
318 |
"normalized": false,
|
@@ -320,7 +288,7 @@
|
|
320 |
"single_word": false,
|
321 |
"special": true
|
322 |
},
|
323 |
-
"
|
324 |
"content": "<extra_id_62>",
|
325 |
"lstrip": false,
|
326 |
"normalized": false,
|
@@ -328,7 +296,7 @@
|
|
328 |
"single_word": false,
|
329 |
"special": true
|
330 |
},
|
331 |
-
"
|
332 |
"content": "<extra_id_61>",
|
333 |
"lstrip": false,
|
334 |
"normalized": false,
|
@@ -336,7 +304,7 @@
|
|
336 |
"single_word": false,
|
337 |
"special": true
|
338 |
},
|
339 |
-
"
|
340 |
"content": "<extra_id_60>",
|
341 |
"lstrip": false,
|
342 |
"normalized": false,
|
@@ -344,7 +312,7 @@
|
|
344 |
"single_word": false,
|
345 |
"special": true
|
346 |
},
|
347 |
-
"
|
348 |
"content": "<extra_id_59>",
|
349 |
"lstrip": false,
|
350 |
"normalized": false,
|
@@ -352,7 +320,7 @@
|
|
352 |
"single_word": false,
|
353 |
"special": true
|
354 |
},
|
355 |
-
"
|
356 |
"content": "<extra_id_58>",
|
357 |
"lstrip": false,
|
358 |
"normalized": false,
|
@@ -360,7 +328,7 @@
|
|
360 |
"single_word": false,
|
361 |
"special": true
|
362 |
},
|
363 |
-
"
|
364 |
"content": "<extra_id_57>",
|
365 |
"lstrip": false,
|
366 |
"normalized": false,
|
@@ -368,7 +336,7 @@
|
|
368 |
"single_word": false,
|
369 |
"special": true
|
370 |
},
|
371 |
-
"
|
372 |
"content": "<extra_id_56>",
|
373 |
"lstrip": false,
|
374 |
"normalized": false,
|
@@ -376,7 +344,7 @@
|
|
376 |
"single_word": false,
|
377 |
"special": true
|
378 |
},
|
379 |
-
"
|
380 |
"content": "<extra_id_55>",
|
381 |
"lstrip": false,
|
382 |
"normalized": false,
|
@@ -384,7 +352,7 @@
|
|
384 |
"single_word": false,
|
385 |
"special": true
|
386 |
},
|
387 |
-
"
|
388 |
"content": "<extra_id_54>",
|
389 |
"lstrip": false,
|
390 |
"normalized": false,
|
@@ -392,7 +360,7 @@
|
|
392 |
"single_word": false,
|
393 |
"special": true
|
394 |
},
|
395 |
-
"
|
396 |
"content": "<extra_id_53>",
|
397 |
"lstrip": false,
|
398 |
"normalized": false,
|
@@ -400,7 +368,7 @@
|
|
400 |
"single_word": false,
|
401 |
"special": true
|
402 |
},
|
403 |
-
"
|
404 |
"content": "<extra_id_52>",
|
405 |
"lstrip": false,
|
406 |
"normalized": false,
|
@@ -408,7 +376,7 @@
|
|
408 |
"single_word": false,
|
409 |
"special": true
|
410 |
},
|
411 |
-
"
|
412 |
"content": "<extra_id_51>",
|
413 |
"lstrip": false,
|
414 |
"normalized": false,
|
@@ -416,7 +384,7 @@
|
|
416 |
"single_word": false,
|
417 |
"special": true
|
418 |
},
|
419 |
-
"
|
420 |
"content": "<extra_id_50>",
|
421 |
"lstrip": false,
|
422 |
"normalized": false,
|
@@ -424,7 +392,7 @@
|
|
424 |
"single_word": false,
|
425 |
"special": true
|
426 |
},
|
427 |
-
"
|
428 |
"content": "<extra_id_49>",
|
429 |
"lstrip": false,
|
430 |
"normalized": false,
|
@@ -432,7 +400,7 @@
|
|
432 |
"single_word": false,
|
433 |
"special": true
|
434 |
},
|
435 |
-
"
|
436 |
"content": "<extra_id_48>",
|
437 |
"lstrip": false,
|
438 |
"normalized": false,
|
@@ -440,7 +408,7 @@
|
|
440 |
"single_word": false,
|
441 |
"special": true
|
442 |
},
|
443 |
-
"
|
444 |
"content": "<extra_id_47>",
|
445 |
"lstrip": false,
|
446 |
"normalized": false,
|
@@ -448,7 +416,7 @@
|
|
448 |
"single_word": false,
|
449 |
"special": true
|
450 |
},
|
451 |
-
"
|
452 |
"content": "<extra_id_46>",
|
453 |
"lstrip": false,
|
454 |
"normalized": false,
|
@@ -456,7 +424,7 @@
|
|
456 |
"single_word": false,
|
457 |
"special": true
|
458 |
},
|
459 |
-
"
|
460 |
"content": "<extra_id_45>",
|
461 |
"lstrip": false,
|
462 |
"normalized": false,
|
@@ -464,7 +432,7 @@
|
|
464 |
"single_word": false,
|
465 |
"special": true
|
466 |
},
|
467 |
-
"
|
468 |
"content": "<extra_id_44>",
|
469 |
"lstrip": false,
|
470 |
"normalized": false,
|
@@ -472,7 +440,7 @@
|
|
472 |
"single_word": false,
|
473 |
"special": true
|
474 |
},
|
475 |
-
"
|
476 |
"content": "<extra_id_43>",
|
477 |
"lstrip": false,
|
478 |
"normalized": false,
|
@@ -480,7 +448,7 @@
|
|
480 |
"single_word": false,
|
481 |
"special": true
|
482 |
},
|
483 |
-
"
|
484 |
"content": "<extra_id_42>",
|
485 |
"lstrip": false,
|
486 |
"normalized": false,
|
@@ -488,7 +456,7 @@
|
|
488 |
"single_word": false,
|
489 |
"special": true
|
490 |
},
|
491 |
-
"
|
492 |
"content": "<extra_id_41>",
|
493 |
"lstrip": false,
|
494 |
"normalized": false,
|
@@ -496,7 +464,7 @@
|
|
496 |
"single_word": false,
|
497 |
"special": true
|
498 |
},
|
499 |
-
"
|
500 |
"content": "<extra_id_40>",
|
501 |
"lstrip": false,
|
502 |
"normalized": false,
|
@@ -504,7 +472,7 @@
|
|
504 |
"single_word": false,
|
505 |
"special": true
|
506 |
},
|
507 |
-
"
|
508 |
"content": "<extra_id_39>",
|
509 |
"lstrip": false,
|
510 |
"normalized": false,
|
@@ -512,7 +480,7 @@
|
|
512 |
"single_word": false,
|
513 |
"special": true
|
514 |
},
|
515 |
-
"
|
516 |
"content": "<extra_id_38>",
|
517 |
"lstrip": false,
|
518 |
"normalized": false,
|
@@ -520,7 +488,7 @@
|
|
520 |
"single_word": false,
|
521 |
"special": true
|
522 |
},
|
523 |
-
"
|
524 |
"content": "<extra_id_37>",
|
525 |
"lstrip": false,
|
526 |
"normalized": false,
|
@@ -528,7 +496,7 @@
|
|
528 |
"single_word": false,
|
529 |
"special": true
|
530 |
},
|
531 |
-
"
|
532 |
"content": "<extra_id_36>",
|
533 |
"lstrip": false,
|
534 |
"normalized": false,
|
@@ -536,7 +504,7 @@
|
|
536 |
"single_word": false,
|
537 |
"special": true
|
538 |
},
|
539 |
-
"
|
540 |
"content": "<extra_id_35>",
|
541 |
"lstrip": false,
|
542 |
"normalized": false,
|
@@ -544,7 +512,7 @@
|
|
544 |
"single_word": false,
|
545 |
"special": true
|
546 |
},
|
547 |
-
"
|
548 |
"content": "<extra_id_34>",
|
549 |
"lstrip": false,
|
550 |
"normalized": false,
|
@@ -552,7 +520,7 @@
|
|
552 |
"single_word": false,
|
553 |
"special": true
|
554 |
},
|
555 |
-
"
|
556 |
"content": "<extra_id_33>",
|
557 |
"lstrip": false,
|
558 |
"normalized": false,
|
@@ -560,7 +528,7 @@
|
|
560 |
"single_word": false,
|
561 |
"special": true
|
562 |
},
|
563 |
-
"
|
564 |
"content": "<extra_id_32>",
|
565 |
"lstrip": false,
|
566 |
"normalized": false,
|
@@ -568,7 +536,7 @@
|
|
568 |
"single_word": false,
|
569 |
"special": true
|
570 |
},
|
571 |
-
"
|
572 |
"content": "<extra_id_31>",
|
573 |
"lstrip": false,
|
574 |
"normalized": false,
|
@@ -576,7 +544,7 @@
|
|
576 |
"single_word": false,
|
577 |
"special": true
|
578 |
},
|
579 |
-
"
|
580 |
"content": "<extra_id_30>",
|
581 |
"lstrip": false,
|
582 |
"normalized": false,
|
@@ -584,7 +552,7 @@
|
|
584 |
"single_word": false,
|
585 |
"special": true
|
586 |
},
|
587 |
-
"
|
588 |
"content": "<extra_id_29>",
|
589 |
"lstrip": false,
|
590 |
"normalized": false,
|
@@ -592,7 +560,7 @@
|
|
592 |
"single_word": false,
|
593 |
"special": true
|
594 |
},
|
595 |
-
"
|
596 |
"content": "<extra_id_28>",
|
597 |
"lstrip": false,
|
598 |
"normalized": false,
|
@@ -600,7 +568,7 @@
|
|
600 |
"single_word": false,
|
601 |
"special": true
|
602 |
},
|
603 |
-
"
|
604 |
"content": "<extra_id_27>",
|
605 |
"lstrip": false,
|
606 |
"normalized": false,
|
@@ -608,7 +576,7 @@
|
|
608 |
"single_word": false,
|
609 |
"special": true
|
610 |
},
|
611 |
-
"
|
612 |
"content": "<extra_id_26>",
|
613 |
"lstrip": false,
|
614 |
"normalized": false,
|
@@ -616,7 +584,7 @@
|
|
616 |
"single_word": false,
|
617 |
"special": true
|
618 |
},
|
619 |
-
"
|
620 |
"content": "<extra_id_25>",
|
621 |
"lstrip": false,
|
622 |
"normalized": false,
|
@@ -624,7 +592,7 @@
|
|
624 |
"single_word": false,
|
625 |
"special": true
|
626 |
},
|
627 |
-
"
|
628 |
"content": "<extra_id_24>",
|
629 |
"lstrip": false,
|
630 |
"normalized": false,
|
@@ -632,7 +600,7 @@
|
|
632 |
"single_word": false,
|
633 |
"special": true
|
634 |
},
|
635 |
-
"
|
636 |
"content": "<extra_id_23>",
|
637 |
"lstrip": false,
|
638 |
"normalized": false,
|
@@ -640,7 +608,7 @@
|
|
640 |
"single_word": false,
|
641 |
"special": true
|
642 |
},
|
643 |
-
"
|
644 |
"content": "<extra_id_22>",
|
645 |
"lstrip": false,
|
646 |
"normalized": false,
|
@@ -648,7 +616,7 @@
|
|
648 |
"single_word": false,
|
649 |
"special": true
|
650 |
},
|
651 |
-
"
|
652 |
"content": "<extra_id_21>",
|
653 |
"lstrip": false,
|
654 |
"normalized": false,
|
@@ -656,7 +624,7 @@
|
|
656 |
"single_word": false,
|
657 |
"special": true
|
658 |
},
|
659 |
-
"
|
660 |
"content": "<extra_id_20>",
|
661 |
"lstrip": false,
|
662 |
"normalized": false,
|
@@ -664,7 +632,7 @@
|
|
664 |
"single_word": false,
|
665 |
"special": true
|
666 |
},
|
667 |
-
"
|
668 |
"content": "<extra_id_19>",
|
669 |
"lstrip": false,
|
670 |
"normalized": false,
|
@@ -672,7 +640,7 @@
|
|
672 |
"single_word": false,
|
673 |
"special": true
|
674 |
},
|
675 |
-
"
|
676 |
"content": "<extra_id_18>",
|
677 |
"lstrip": false,
|
678 |
"normalized": false,
|
@@ -680,7 +648,7 @@
|
|
680 |
"single_word": false,
|
681 |
"special": true
|
682 |
},
|
683 |
-
"
|
684 |
"content": "<extra_id_17>",
|
685 |
"lstrip": false,
|
686 |
"normalized": false,
|
@@ -688,7 +656,7 @@
|
|
688 |
"single_word": false,
|
689 |
"special": true
|
690 |
},
|
691 |
-
"
|
692 |
"content": "<extra_id_16>",
|
693 |
"lstrip": false,
|
694 |
"normalized": false,
|
@@ -696,7 +664,7 @@
|
|
696 |
"single_word": false,
|
697 |
"special": true
|
698 |
},
|
699 |
-
"
|
700 |
"content": "<extra_id_15>",
|
701 |
"lstrip": false,
|
702 |
"normalized": false,
|
@@ -704,7 +672,7 @@
|
|
704 |
"single_word": false,
|
705 |
"special": true
|
706 |
},
|
707 |
-
"
|
708 |
"content": "<extra_id_14>",
|
709 |
"lstrip": false,
|
710 |
"normalized": false,
|
@@ -712,7 +680,7 @@
|
|
712 |
"single_word": false,
|
713 |
"special": true
|
714 |
},
|
715 |
-
"
|
716 |
"content": "<extra_id_13>",
|
717 |
"lstrip": false,
|
718 |
"normalized": false,
|
@@ -720,7 +688,7 @@
|
|
720 |
"single_word": false,
|
721 |
"special": true
|
722 |
},
|
723 |
-
"
|
724 |
"content": "<extra_id_12>",
|
725 |
"lstrip": false,
|
726 |
"normalized": false,
|
@@ -728,7 +696,7 @@
|
|
728 |
"single_word": false,
|
729 |
"special": true
|
730 |
},
|
731 |
-
"
|
732 |
"content": "<extra_id_11>",
|
733 |
"lstrip": false,
|
734 |
"normalized": false,
|
@@ -736,7 +704,7 @@
|
|
736 |
"single_word": false,
|
737 |
"special": true
|
738 |
},
|
739 |
-
"
|
740 |
"content": "<extra_id_10>",
|
741 |
"lstrip": false,
|
742 |
"normalized": false,
|
@@ -744,7 +712,7 @@
|
|
744 |
"single_word": false,
|
745 |
"special": true
|
746 |
},
|
747 |
-
"
|
748 |
"content": "<extra_id_9>",
|
749 |
"lstrip": false,
|
750 |
"normalized": false,
|
@@ -752,7 +720,7 @@
|
|
752 |
"single_word": false,
|
753 |
"special": true
|
754 |
},
|
755 |
-
"
|
756 |
"content": "<extra_id_8>",
|
757 |
"lstrip": false,
|
758 |
"normalized": false,
|
@@ -760,7 +728,7 @@
|
|
760 |
"single_word": false,
|
761 |
"special": true
|
762 |
},
|
763 |
-
"
|
764 |
"content": "<extra_id_7>",
|
765 |
"lstrip": false,
|
766 |
"normalized": false,
|
@@ -768,7 +736,7 @@
|
|
768 |
"single_word": false,
|
769 |
"special": true
|
770 |
},
|
771 |
-
"
|
772 |
"content": "<extra_id_6>",
|
773 |
"lstrip": false,
|
774 |
"normalized": false,
|
@@ -776,7 +744,7 @@
|
|
776 |
"single_word": false,
|
777 |
"special": true
|
778 |
},
|
779 |
-
"
|
780 |
"content": "<extra_id_5>",
|
781 |
"lstrip": false,
|
782 |
"normalized": false,
|
@@ -784,7 +752,7 @@
|
|
784 |
"single_word": false,
|
785 |
"special": true
|
786 |
},
|
787 |
-
"
|
788 |
"content": "<extra_id_4>",
|
789 |
"lstrip": false,
|
790 |
"normalized": false,
|
@@ -792,7 +760,7 @@
|
|
792 |
"single_word": false,
|
793 |
"special": true
|
794 |
},
|
795 |
-
"
|
796 |
"content": "<extra_id_3>",
|
797 |
"lstrip": false,
|
798 |
"normalized": false,
|
@@ -800,7 +768,7 @@
|
|
800 |
"single_word": false,
|
801 |
"special": true
|
802 |
},
|
803 |
-
"
|
804 |
"content": "<extra_id_2>",
|
805 |
"lstrip": false,
|
806 |
"normalized": false,
|
@@ -808,7 +776,7 @@
|
|
808 |
"single_word": false,
|
809 |
"special": true
|
810 |
},
|
811 |
-
"
|
812 |
"content": "<extra_id_1>",
|
813 |
"lstrip": false,
|
814 |
"normalized": false,
|
@@ -816,7 +784,7 @@
|
|
816 |
"single_word": false,
|
817 |
"special": true
|
818 |
},
|
819 |
-
"
|
820 |
"content": "<extra_id_0>",
|
821 |
"lstrip": false,
|
822 |
"normalized": false,
|
@@ -921,17 +889,14 @@
|
|
921 |
"<extra_id_92>",
|
922 |
"<extra_id_93>",
|
923 |
"<extra_id_94>",
|
924 |
-
"<extra_id_95>"
|
925 |
-
"<extra_id_96>",
|
926 |
-
"<extra_id_97>",
|
927 |
-
"<extra_id_98>",
|
928 |
-
"<extra_id_99>"
|
929 |
],
|
930 |
"clean_up_tokenization_spaces": true,
|
931 |
"eos_token": "</s>",
|
932 |
-
"extra_ids":
|
933 |
-
"model_max_length":
|
934 |
"pad_token": "<pad>",
|
|
|
935 |
"tokenizer_class": "T5Tokenizer",
|
936 |
"unk_token": "<unk>"
|
937 |
}
|
|
|
24 |
"single_word": false,
|
25 |
"special": true
|
26 |
},
|
27 |
+
"36000": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
"content": "<extra_id_95>",
|
29 |
"lstrip": false,
|
30 |
"normalized": false,
|
|
|
32 |
"single_word": false,
|
33 |
"special": true
|
34 |
},
|
35 |
+
"36001": {
|
36 |
"content": "<extra_id_94>",
|
37 |
"lstrip": false,
|
38 |
"normalized": false,
|
|
|
40 |
"single_word": false,
|
41 |
"special": true
|
42 |
},
|
43 |
+
"36002": {
|
44 |
"content": "<extra_id_93>",
|
45 |
"lstrip": false,
|
46 |
"normalized": false,
|
|
|
48 |
"single_word": false,
|
49 |
"special": true
|
50 |
},
|
51 |
+
"36003": {
|
52 |
"content": "<extra_id_92>",
|
53 |
"lstrip": false,
|
54 |
"normalized": false,
|
|
|
56 |
"single_word": false,
|
57 |
"special": true
|
58 |
},
|
59 |
+
"36004": {
|
60 |
"content": "<extra_id_91>",
|
61 |
"lstrip": false,
|
62 |
"normalized": false,
|
|
|
64 |
"single_word": false,
|
65 |
"special": true
|
66 |
},
|
67 |
+
"36005": {
|
68 |
"content": "<extra_id_90>",
|
69 |
"lstrip": false,
|
70 |
"normalized": false,
|
|
|
72 |
"single_word": false,
|
73 |
"special": true
|
74 |
},
|
75 |
+
"36006": {
|
76 |
"content": "<extra_id_89>",
|
77 |
"lstrip": false,
|
78 |
"normalized": false,
|
|
|
80 |
"single_word": false,
|
81 |
"special": true
|
82 |
},
|
83 |
+
"36007": {
|
84 |
"content": "<extra_id_88>",
|
85 |
"lstrip": false,
|
86 |
"normalized": false,
|
|
|
88 |
"single_word": false,
|
89 |
"special": true
|
90 |
},
|
91 |
+
"36008": {
|
92 |
"content": "<extra_id_87>",
|
93 |
"lstrip": false,
|
94 |
"normalized": false,
|
|
|
96 |
"single_word": false,
|
97 |
"special": true
|
98 |
},
|
99 |
+
"36009": {
|
100 |
"content": "<extra_id_86>",
|
101 |
"lstrip": false,
|
102 |
"normalized": false,
|
|
|
104 |
"single_word": false,
|
105 |
"special": true
|
106 |
},
|
107 |
+
"36010": {
|
108 |
"content": "<extra_id_85>",
|
109 |
"lstrip": false,
|
110 |
"normalized": false,
|
|
|
112 |
"single_word": false,
|
113 |
"special": true
|
114 |
},
|
115 |
+
"36011": {
|
116 |
"content": "<extra_id_84>",
|
117 |
"lstrip": false,
|
118 |
"normalized": false,
|
|
|
120 |
"single_word": false,
|
121 |
"special": true
|
122 |
},
|
123 |
+
"36012": {
|
124 |
"content": "<extra_id_83>",
|
125 |
"lstrip": false,
|
126 |
"normalized": false,
|
|
|
128 |
"single_word": false,
|
129 |
"special": true
|
130 |
},
|
131 |
+
"36013": {
|
132 |
"content": "<extra_id_82>",
|
133 |
"lstrip": false,
|
134 |
"normalized": false,
|
|
|
136 |
"single_word": false,
|
137 |
"special": true
|
138 |
},
|
139 |
+
"36014": {
|
140 |
"content": "<extra_id_81>",
|
141 |
"lstrip": false,
|
142 |
"normalized": false,
|
|
|
144 |
"single_word": false,
|
145 |
"special": true
|
146 |
},
|
147 |
+
"36015": {
|
148 |
"content": "<extra_id_80>",
|
149 |
"lstrip": false,
|
150 |
"normalized": false,
|
|
|
152 |
"single_word": false,
|
153 |
"special": true
|
154 |
},
|
155 |
+
"36016": {
|
156 |
"content": "<extra_id_79>",
|
157 |
"lstrip": false,
|
158 |
"normalized": false,
|
|
|
160 |
"single_word": false,
|
161 |
"special": true
|
162 |
},
|
163 |
+
"36017": {
|
164 |
"content": "<extra_id_78>",
|
165 |
"lstrip": false,
|
166 |
"normalized": false,
|
|
|
168 |
"single_word": false,
|
169 |
"special": true
|
170 |
},
|
171 |
+
"36018": {
|
172 |
"content": "<extra_id_77>",
|
173 |
"lstrip": false,
|
174 |
"normalized": false,
|
|
|
176 |
"single_word": false,
|
177 |
"special": true
|
178 |
},
|
179 |
+
"36019": {
|
180 |
"content": "<extra_id_76>",
|
181 |
"lstrip": false,
|
182 |
"normalized": false,
|
|
|
184 |
"single_word": false,
|
185 |
"special": true
|
186 |
},
|
187 |
+
"36020": {
|
188 |
"content": "<extra_id_75>",
|
189 |
"lstrip": false,
|
190 |
"normalized": false,
|
|
|
192 |
"single_word": false,
|
193 |
"special": true
|
194 |
},
|
195 |
+
"36021": {
|
196 |
"content": "<extra_id_74>",
|
197 |
"lstrip": false,
|
198 |
"normalized": false,
|
|
|
200 |
"single_word": false,
|
201 |
"special": true
|
202 |
},
|
203 |
+
"36022": {
|
204 |
"content": "<extra_id_73>",
|
205 |
"lstrip": false,
|
206 |
"normalized": false,
|
|
|
208 |
"single_word": false,
|
209 |
"special": true
|
210 |
},
|
211 |
+
"36023": {
|
212 |
"content": "<extra_id_72>",
|
213 |
"lstrip": false,
|
214 |
"normalized": false,
|
|
|
216 |
"single_word": false,
|
217 |
"special": true
|
218 |
},
|
219 |
+
"36024": {
|
220 |
"content": "<extra_id_71>",
|
221 |
"lstrip": false,
|
222 |
"normalized": false,
|
|
|
224 |
"single_word": false,
|
225 |
"special": true
|
226 |
},
|
227 |
+
"36025": {
|
228 |
"content": "<extra_id_70>",
|
229 |
"lstrip": false,
|
230 |
"normalized": false,
|
|
|
232 |
"single_word": false,
|
233 |
"special": true
|
234 |
},
|
235 |
+
"36026": {
|
236 |
"content": "<extra_id_69>",
|
237 |
"lstrip": false,
|
238 |
"normalized": false,
|
|
|
240 |
"single_word": false,
|
241 |
"special": true
|
242 |
},
|
243 |
+
"36027": {
|
244 |
"content": "<extra_id_68>",
|
245 |
"lstrip": false,
|
246 |
"normalized": false,
|
|
|
248 |
"single_word": false,
|
249 |
"special": true
|
250 |
},
|
251 |
+
"36028": {
|
252 |
"content": "<extra_id_67>",
|
253 |
"lstrip": false,
|
254 |
"normalized": false,
|
|
|
256 |
"single_word": false,
|
257 |
"special": true
|
258 |
},
|
259 |
+
"36029": {
|
260 |
"content": "<extra_id_66>",
|
261 |
"lstrip": false,
|
262 |
"normalized": false,
|
|
|
264 |
"single_word": false,
|
265 |
"special": true
|
266 |
},
|
267 |
+
"36030": {
|
268 |
"content": "<extra_id_65>",
|
269 |
"lstrip": false,
|
270 |
"normalized": false,
|
|
|
272 |
"single_word": false,
|
273 |
"special": true
|
274 |
},
|
275 |
+
"36031": {
|
276 |
"content": "<extra_id_64>",
|
277 |
"lstrip": false,
|
278 |
"normalized": false,
|
|
|
280 |
"single_word": false,
|
281 |
"special": true
|
282 |
},
|
283 |
+
"36032": {
|
284 |
"content": "<extra_id_63>",
|
285 |
"lstrip": false,
|
286 |
"normalized": false,
|
|
|
288 |
"single_word": false,
|
289 |
"special": true
|
290 |
},
|
291 |
+
"36033": {
|
292 |
"content": "<extra_id_62>",
|
293 |
"lstrip": false,
|
294 |
"normalized": false,
|
|
|
296 |
"single_word": false,
|
297 |
"special": true
|
298 |
},
|
299 |
+
"36034": {
|
300 |
"content": "<extra_id_61>",
|
301 |
"lstrip": false,
|
302 |
"normalized": false,
|
|
|
304 |
"single_word": false,
|
305 |
"special": true
|
306 |
},
|
307 |
+
"36035": {
|
308 |
"content": "<extra_id_60>",
|
309 |
"lstrip": false,
|
310 |
"normalized": false,
|
|
|
312 |
"single_word": false,
|
313 |
"special": true
|
314 |
},
|
315 |
+
"36036": {
|
316 |
"content": "<extra_id_59>",
|
317 |
"lstrip": false,
|
318 |
"normalized": false,
|
|
|
320 |
"single_word": false,
|
321 |
"special": true
|
322 |
},
|
323 |
+
"36037": {
|
324 |
"content": "<extra_id_58>",
|
325 |
"lstrip": false,
|
326 |
"normalized": false,
|
|
|
328 |
"single_word": false,
|
329 |
"special": true
|
330 |
},
|
331 |
+
"36038": {
|
332 |
"content": "<extra_id_57>",
|
333 |
"lstrip": false,
|
334 |
"normalized": false,
|
|
|
336 |
"single_word": false,
|
337 |
"special": true
|
338 |
},
|
339 |
+
"36039": {
|
340 |
"content": "<extra_id_56>",
|
341 |
"lstrip": false,
|
342 |
"normalized": false,
|
|
|
344 |
"single_word": false,
|
345 |
"special": true
|
346 |
},
|
347 |
+
"36040": {
|
348 |
"content": "<extra_id_55>",
|
349 |
"lstrip": false,
|
350 |
"normalized": false,
|
|
|
352 |
"single_word": false,
|
353 |
"special": true
|
354 |
},
|
355 |
+
"36041": {
|
356 |
"content": "<extra_id_54>",
|
357 |
"lstrip": false,
|
358 |
"normalized": false,
|
|
|
360 |
"single_word": false,
|
361 |
"special": true
|
362 |
},
|
363 |
+
"36042": {
|
364 |
"content": "<extra_id_53>",
|
365 |
"lstrip": false,
|
366 |
"normalized": false,
|
|
|
368 |
"single_word": false,
|
369 |
"special": true
|
370 |
},
|
371 |
+
"36043": {
|
372 |
"content": "<extra_id_52>",
|
373 |
"lstrip": false,
|
374 |
"normalized": false,
|
|
|
376 |
"single_word": false,
|
377 |
"special": true
|
378 |
},
|
379 |
+
"36044": {
|
380 |
"content": "<extra_id_51>",
|
381 |
"lstrip": false,
|
382 |
"normalized": false,
|
|
|
384 |
"single_word": false,
|
385 |
"special": true
|
386 |
},
|
387 |
+
"36045": {
|
388 |
"content": "<extra_id_50>",
|
389 |
"lstrip": false,
|
390 |
"normalized": false,
|
|
|
392 |
"single_word": false,
|
393 |
"special": true
|
394 |
},
|
395 |
+
"36046": {
|
396 |
"content": "<extra_id_49>",
|
397 |
"lstrip": false,
|
398 |
"normalized": false,
|
|
|
400 |
"single_word": false,
|
401 |
"special": true
|
402 |
},
|
403 |
+
"36047": {
|
404 |
"content": "<extra_id_48>",
|
405 |
"lstrip": false,
|
406 |
"normalized": false,
|
|
|
408 |
"single_word": false,
|
409 |
"special": true
|
410 |
},
|
411 |
+
"36048": {
|
412 |
"content": "<extra_id_47>",
|
413 |
"lstrip": false,
|
414 |
"normalized": false,
|
|
|
416 |
"single_word": false,
|
417 |
"special": true
|
418 |
},
|
419 |
+
"36049": {
|
420 |
"content": "<extra_id_46>",
|
421 |
"lstrip": false,
|
422 |
"normalized": false,
|
|
|
424 |
"single_word": false,
|
425 |
"special": true
|
426 |
},
|
427 |
+
"36050": {
|
428 |
"content": "<extra_id_45>",
|
429 |
"lstrip": false,
|
430 |
"normalized": false,
|
|
|
432 |
"single_word": false,
|
433 |
"special": true
|
434 |
},
|
435 |
+
"36051": {
|
436 |
"content": "<extra_id_44>",
|
437 |
"lstrip": false,
|
438 |
"normalized": false,
|
|
|
440 |
"single_word": false,
|
441 |
"special": true
|
442 |
},
|
443 |
+
"36052": {
|
444 |
"content": "<extra_id_43>",
|
445 |
"lstrip": false,
|
446 |
"normalized": false,
|
|
|
448 |
"single_word": false,
|
449 |
"special": true
|
450 |
},
|
451 |
+
"36053": {
|
452 |
"content": "<extra_id_42>",
|
453 |
"lstrip": false,
|
454 |
"normalized": false,
|
|
|
456 |
"single_word": false,
|
457 |
"special": true
|
458 |
},
|
459 |
+
"36054": {
|
460 |
"content": "<extra_id_41>",
|
461 |
"lstrip": false,
|
462 |
"normalized": false,
|
|
|
464 |
"single_word": false,
|
465 |
"special": true
|
466 |
},
|
467 |
+
"36055": {
|
468 |
"content": "<extra_id_40>",
|
469 |
"lstrip": false,
|
470 |
"normalized": false,
|
|
|
472 |
"single_word": false,
|
473 |
"special": true
|
474 |
},
|
475 |
+
"36056": {
|
476 |
"content": "<extra_id_39>",
|
477 |
"lstrip": false,
|
478 |
"normalized": false,
|
|
|
480 |
"single_word": false,
|
481 |
"special": true
|
482 |
},
|
483 |
+
"36057": {
|
484 |
"content": "<extra_id_38>",
|
485 |
"lstrip": false,
|
486 |
"normalized": false,
|
|
|
488 |
"single_word": false,
|
489 |
"special": true
|
490 |
},
|
491 |
+
"36058": {
|
492 |
"content": "<extra_id_37>",
|
493 |
"lstrip": false,
|
494 |
"normalized": false,
|
|
|
496 |
"single_word": false,
|
497 |
"special": true
|
498 |
},
|
499 |
+
"36059": {
|
500 |
"content": "<extra_id_36>",
|
501 |
"lstrip": false,
|
502 |
"normalized": false,
|
|
|
504 |
"single_word": false,
|
505 |
"special": true
|
506 |
},
|
507 |
+
"36060": {
|
508 |
"content": "<extra_id_35>",
|
509 |
"lstrip": false,
|
510 |
"normalized": false,
|
|
|
512 |
"single_word": false,
|
513 |
"special": true
|
514 |
},
|
515 |
+
"36061": {
|
516 |
"content": "<extra_id_34>",
|
517 |
"lstrip": false,
|
518 |
"normalized": false,
|
|
|
520 |
"single_word": false,
|
521 |
"special": true
|
522 |
},
|
523 |
+
"36062": {
|
524 |
"content": "<extra_id_33>",
|
525 |
"lstrip": false,
|
526 |
"normalized": false,
|
|
|
528 |
"single_word": false,
|
529 |
"special": true
|
530 |
},
|
531 |
+
"36063": {
|
532 |
"content": "<extra_id_32>",
|
533 |
"lstrip": false,
|
534 |
"normalized": false,
|
|
|
536 |
"single_word": false,
|
537 |
"special": true
|
538 |
},
|
539 |
+
"36064": {
|
540 |
"content": "<extra_id_31>",
|
541 |
"lstrip": false,
|
542 |
"normalized": false,
|
|
|
544 |
"single_word": false,
|
545 |
"special": true
|
546 |
},
|
547 |
+
"36065": {
|
548 |
"content": "<extra_id_30>",
|
549 |
"lstrip": false,
|
550 |
"normalized": false,
|
|
|
552 |
"single_word": false,
|
553 |
"special": true
|
554 |
},
|
555 |
+
"36066": {
|
556 |
"content": "<extra_id_29>",
|
557 |
"lstrip": false,
|
558 |
"normalized": false,
|
|
|
560 |
"single_word": false,
|
561 |
"special": true
|
562 |
},
|
563 |
+
"36067": {
|
564 |
"content": "<extra_id_28>",
|
565 |
"lstrip": false,
|
566 |
"normalized": false,
|
|
|
568 |
"single_word": false,
|
569 |
"special": true
|
570 |
},
|
571 |
+
"36068": {
|
572 |
"content": "<extra_id_27>",
|
573 |
"lstrip": false,
|
574 |
"normalized": false,
|
|
|
576 |
"single_word": false,
|
577 |
"special": true
|
578 |
},
|
579 |
+
"36069": {
|
580 |
"content": "<extra_id_26>",
|
581 |
"lstrip": false,
|
582 |
"normalized": false,
|
|
|
584 |
"single_word": false,
|
585 |
"special": true
|
586 |
},
|
587 |
+
"36070": {
|
588 |
"content": "<extra_id_25>",
|
589 |
"lstrip": false,
|
590 |
"normalized": false,
|
|
|
592 |
"single_word": false,
|
593 |
"special": true
|
594 |
},
|
595 |
+
"36071": {
|
596 |
"content": "<extra_id_24>",
|
597 |
"lstrip": false,
|
598 |
"normalized": false,
|
|
|
600 |
"single_word": false,
|
601 |
"special": true
|
602 |
},
|
603 |
+
"36072": {
|
604 |
"content": "<extra_id_23>",
|
605 |
"lstrip": false,
|
606 |
"normalized": false,
|
|
|
608 |
"single_word": false,
|
609 |
"special": true
|
610 |
},
|
611 |
+
"36073": {
|
612 |
"content": "<extra_id_22>",
|
613 |
"lstrip": false,
|
614 |
"normalized": false,
|
|
|
616 |
"single_word": false,
|
617 |
"special": true
|
618 |
},
|
619 |
+
"36074": {
|
620 |
"content": "<extra_id_21>",
|
621 |
"lstrip": false,
|
622 |
"normalized": false,
|
|
|
624 |
"single_word": false,
|
625 |
"special": true
|
626 |
},
|
627 |
+
"36075": {
|
628 |
"content": "<extra_id_20>",
|
629 |
"lstrip": false,
|
630 |
"normalized": false,
|
|
|
632 |
"single_word": false,
|
633 |
"special": true
|
634 |
},
|
635 |
+
"36076": {
|
636 |
"content": "<extra_id_19>",
|
637 |
"lstrip": false,
|
638 |
"normalized": false,
|
|
|
640 |
"single_word": false,
|
641 |
"special": true
|
642 |
},
|
643 |
+
"36077": {
|
644 |
"content": "<extra_id_18>",
|
645 |
"lstrip": false,
|
646 |
"normalized": false,
|
|
|
648 |
"single_word": false,
|
649 |
"special": true
|
650 |
},
|
651 |
+
"36078": {
|
652 |
"content": "<extra_id_17>",
|
653 |
"lstrip": false,
|
654 |
"normalized": false,
|
|
|
656 |
"single_word": false,
|
657 |
"special": true
|
658 |
},
|
659 |
+
"36079": {
|
660 |
"content": "<extra_id_16>",
|
661 |
"lstrip": false,
|
662 |
"normalized": false,
|
|
|
664 |
"single_word": false,
|
665 |
"special": true
|
666 |
},
|
667 |
+
"36080": {
|
668 |
"content": "<extra_id_15>",
|
669 |
"lstrip": false,
|
670 |
"normalized": false,
|
|
|
672 |
"single_word": false,
|
673 |
"special": true
|
674 |
},
|
675 |
+
"36081": {
|
676 |
"content": "<extra_id_14>",
|
677 |
"lstrip": false,
|
678 |
"normalized": false,
|
|
|
680 |
"single_word": false,
|
681 |
"special": true
|
682 |
},
|
683 |
+
"36082": {
|
684 |
"content": "<extra_id_13>",
|
685 |
"lstrip": false,
|
686 |
"normalized": false,
|
|
|
688 |
"single_word": false,
|
689 |
"special": true
|
690 |
},
|
691 |
+
"36083": {
|
692 |
"content": "<extra_id_12>",
|
693 |
"lstrip": false,
|
694 |
"normalized": false,
|
|
|
696 |
"single_word": false,
|
697 |
"special": true
|
698 |
},
|
699 |
+
"36084": {
|
700 |
"content": "<extra_id_11>",
|
701 |
"lstrip": false,
|
702 |
"normalized": false,
|
|
|
704 |
"single_word": false,
|
705 |
"special": true
|
706 |
},
|
707 |
+
"36085": {
|
708 |
"content": "<extra_id_10>",
|
709 |
"lstrip": false,
|
710 |
"normalized": false,
|
|
|
712 |
"single_word": false,
|
713 |
"special": true
|
714 |
},
|
715 |
+
"36086": {
|
716 |
"content": "<extra_id_9>",
|
717 |
"lstrip": false,
|
718 |
"normalized": false,
|
|
|
720 |
"single_word": false,
|
721 |
"special": true
|
722 |
},
|
723 |
+
"36087": {
|
724 |
"content": "<extra_id_8>",
|
725 |
"lstrip": false,
|
726 |
"normalized": false,
|
|
|
728 |
"single_word": false,
|
729 |
"special": true
|
730 |
},
|
731 |
+
"36088": {
|
732 |
"content": "<extra_id_7>",
|
733 |
"lstrip": false,
|
734 |
"normalized": false,
|
|
|
736 |
"single_word": false,
|
737 |
"special": true
|
738 |
},
|
739 |
+
"36089": {
|
740 |
"content": "<extra_id_6>",
|
741 |
"lstrip": false,
|
742 |
"normalized": false,
|
|
|
744 |
"single_word": false,
|
745 |
"special": true
|
746 |
},
|
747 |
+
"36090": {
|
748 |
"content": "<extra_id_5>",
|
749 |
"lstrip": false,
|
750 |
"normalized": false,
|
|
|
752 |
"single_word": false,
|
753 |
"special": true
|
754 |
},
|
755 |
+
"36091": {
|
756 |
"content": "<extra_id_4>",
|
757 |
"lstrip": false,
|
758 |
"normalized": false,
|
|
|
760 |
"single_word": false,
|
761 |
"special": true
|
762 |
},
|
763 |
+
"36092": {
|
764 |
"content": "<extra_id_3>",
|
765 |
"lstrip": false,
|
766 |
"normalized": false,
|
|
|
768 |
"single_word": false,
|
769 |
"special": true
|
770 |
},
|
771 |
+
"36093": {
|
772 |
"content": "<extra_id_2>",
|
773 |
"lstrip": false,
|
774 |
"normalized": false,
|
|
|
776 |
"single_word": false,
|
777 |
"special": true
|
778 |
},
|
779 |
+
"36094": {
|
780 |
"content": "<extra_id_1>",
|
781 |
"lstrip": false,
|
782 |
"normalized": false,
|
|
|
784 |
"single_word": false,
|
785 |
"special": true
|
786 |
},
|
787 |
+
"36095": {
|
788 |
"content": "<extra_id_0>",
|
789 |
"lstrip": false,
|
790 |
"normalized": false,
|
|
|
889 |
"<extra_id_92>",
|
890 |
"<extra_id_93>",
|
891 |
"<extra_id_94>",
|
892 |
+
"<extra_id_95>"
|
|
|
|
|
|
|
|
|
893 |
],
|
894 |
"clean_up_tokenization_spaces": true,
|
895 |
"eos_token": "</s>",
|
896 |
+
"extra_ids": 96,
|
897 |
+
"model_max_length": 1000000000000000019884624838656,
|
898 |
"pad_token": "<pad>",
|
899 |
+
"sp_model_kwargs": {},
|
900 |
"tokenizer_class": "T5Tokenizer",
|
901 |
"unk_token": "<unk>"
|
902 |
}
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 16,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -12,1580 +12,128 @@
|
|
12 |
"epoch": 6.464646464646465e-05,
|
13 |
"grad_norm": NaN,
|
14 |
"learning_rate": 0.0,
|
15 |
-
"loss":
|
16 |
"step": 1
|
17 |
},
|
18 |
{
|
19 |
"epoch": 0.0001292929292929293,
|
20 |
"grad_norm": NaN,
|
21 |
"learning_rate": 0.0,
|
22 |
-
"loss":
|
23 |
"step": 2
|
24 |
},
|
25 |
{
|
26 |
"epoch": 0.00019393939393939395,
|
27 |
-
"grad_norm":
|
28 |
-
"learning_rate":
|
29 |
-
"loss":
|
30 |
"step": 3
|
31 |
},
|
32 |
{
|
33 |
"epoch": 0.0002585858585858586,
|
34 |
-
"grad_norm":
|
35 |
-
"learning_rate":
|
36 |
-
"loss":
|
37 |
"step": 4
|
38 |
},
|
39 |
{
|
40 |
"epoch": 0.00032323232323232324,
|
41 |
-
"grad_norm":
|
42 |
-
"learning_rate":
|
43 |
-
"loss":
|
44 |
"step": 5
|
45 |
},
|
46 |
{
|
47 |
"epoch": 0.0003878787878787879,
|
48 |
-
"grad_norm":
|
49 |
-
"learning_rate":
|
50 |
-
"loss":
|
51 |
"step": 6
|
52 |
},
|
53 |
{
|
54 |
"epoch": 0.0004525252525252525,
|
55 |
-
"grad_norm":
|
56 |
-
"learning_rate":
|
57 |
-
"loss":
|
58 |
"step": 7
|
59 |
},
|
60 |
{
|
61 |
"epoch": 0.0005171717171717172,
|
62 |
-
"grad_norm":
|
63 |
-
"learning_rate":
|
64 |
-
"loss":
|
65 |
"step": 8
|
66 |
},
|
67 |
{
|
68 |
"epoch": 0.0005818181818181818,
|
69 |
-
"grad_norm":
|
70 |
-
"learning_rate":
|
71 |
-
"loss":
|
72 |
"step": 9
|
73 |
},
|
74 |
{
|
75 |
"epoch": 0.0006464646464646465,
|
76 |
-
"grad_norm":
|
77 |
-
"learning_rate":
|
78 |
-
"loss":
|
79 |
"step": 10
|
80 |
},
|
81 |
{
|
82 |
"epoch": 0.0007111111111111111,
|
83 |
-
"grad_norm":
|
84 |
-
"learning_rate":
|
85 |
-
"loss":
|
86 |
"step": 11
|
87 |
},
|
88 |
{
|
89 |
"epoch": 0.0007757575757575758,
|
90 |
-
"grad_norm":
|
91 |
-
"learning_rate":
|
92 |
-
"loss":
|
93 |
"step": 12
|
94 |
},
|
95 |
{
|
96 |
"epoch": 0.0008404040404040404,
|
97 |
-
"grad_norm":
|
98 |
-
"learning_rate":
|
99 |
-
"loss":
|
100 |
"step": 13
|
101 |
},
|
102 |
{
|
103 |
"epoch": 0.000905050505050505,
|
104 |
-
"grad_norm":
|
105 |
-
"learning_rate":
|
106 |
-
"loss":
|
107 |
"step": 14
|
108 |
},
|
109 |
{
|
110 |
"epoch": 0.0009696969696969697,
|
111 |
-
"grad_norm":
|
112 |
-
"learning_rate":
|
113 |
-
"loss":
|
114 |
"step": 15
|
115 |
},
|
116 |
{
|
117 |
"epoch": 0.0010343434343434343,
|
118 |
-
"grad_norm":
|
119 |
-
"learning_rate":
|
120 |
-
"loss":
|
121 |
"step": 16
|
122 |
},
|
123 |
{
|
124 |
"epoch": 0.0010343434343434343,
|
125 |
-
"eval_bleu":
|
126 |
-
"eval_loss":
|
127 |
-
"eval_runtime":
|
128 |
-
"eval_samples_per_second":
|
129 |
-
"eval_steps_per_second":
|
130 |
"step": 16
|
131 |
-
},
|
132 |
-
{
|
133 |
-
"epoch": 0.0010989898989898989,
|
134 |
-
"grad_norm": NaN,
|
135 |
-
"learning_rate": 1.5483870967741936e-05,
|
136 |
-
"loss": 11.3409,
|
137 |
-
"step": 17
|
138 |
-
},
|
139 |
-
{
|
140 |
-
"epoch": 0.0011636363636363637,
|
141 |
-
"grad_norm": NaN,
|
142 |
-
"learning_rate": 1.5483870967741936e-05,
|
143 |
-
"loss": 10.8596,
|
144 |
-
"step": 18
|
145 |
-
},
|
146 |
-
{
|
147 |
-
"epoch": 0.0012282828282828282,
|
148 |
-
"grad_norm": 191.51080322265625,
|
149 |
-
"learning_rate": 1.6774193548387098e-05,
|
150 |
-
"loss": 11.4091,
|
151 |
-
"step": 19
|
152 |
-
},
|
153 |
-
{
|
154 |
-
"epoch": 0.001292929292929293,
|
155 |
-
"grad_norm": 60.890865325927734,
|
156 |
-
"learning_rate": 1.806451612903226e-05,
|
157 |
-
"loss": 11.6678,
|
158 |
-
"step": 20
|
159 |
-
},
|
160 |
-
{
|
161 |
-
"epoch": 0.0013575757575757575,
|
162 |
-
"grad_norm": 86.45341491699219,
|
163 |
-
"learning_rate": 1.935483870967742e-05,
|
164 |
-
"loss": 11.0757,
|
165 |
-
"step": 21
|
166 |
-
},
|
167 |
-
{
|
168 |
-
"epoch": 0.0014222222222222223,
|
169 |
-
"grad_norm": 157.23501586914062,
|
170 |
-
"learning_rate": 2.0645161290322582e-05,
|
171 |
-
"loss": 10.7504,
|
172 |
-
"step": 22
|
173 |
-
},
|
174 |
-
{
|
175 |
-
"epoch": 0.0014868686868686868,
|
176 |
-
"grad_norm": 63.56692123413086,
|
177 |
-
"learning_rate": 2.1935483870967744e-05,
|
178 |
-
"loss": 10.3107,
|
179 |
-
"step": 23
|
180 |
-
},
|
181 |
-
{
|
182 |
-
"epoch": 0.0015515151515151516,
|
183 |
-
"grad_norm": 134.14984130859375,
|
184 |
-
"learning_rate": 2.3225806451612906e-05,
|
185 |
-
"loss": 9.6532,
|
186 |
-
"step": 24
|
187 |
-
},
|
188 |
-
{
|
189 |
-
"epoch": 0.0016161616161616162,
|
190 |
-
"grad_norm": 58.10708999633789,
|
191 |
-
"learning_rate": 2.4516129032258064e-05,
|
192 |
-
"loss": 9.6044,
|
193 |
-
"step": 25
|
194 |
-
},
|
195 |
-
{
|
196 |
-
"epoch": 0.0016808080808080807,
|
197 |
-
"grad_norm": 66.19347381591797,
|
198 |
-
"learning_rate": 2.5806451612903226e-05,
|
199 |
-
"loss": 9.1942,
|
200 |
-
"step": 26
|
201 |
-
},
|
202 |
-
{
|
203 |
-
"epoch": 0.0017454545454545455,
|
204 |
-
"grad_norm": 60.57972717285156,
|
205 |
-
"learning_rate": 2.709677419354839e-05,
|
206 |
-
"loss": 9.2875,
|
207 |
-
"step": 27
|
208 |
-
},
|
209 |
-
{
|
210 |
-
"epoch": 0.00181010101010101,
|
211 |
-
"grad_norm": 58.872520446777344,
|
212 |
-
"learning_rate": 2.838709677419355e-05,
|
213 |
-
"loss": 7.7579,
|
214 |
-
"step": 28
|
215 |
-
},
|
216 |
-
{
|
217 |
-
"epoch": 0.0018747474747474748,
|
218 |
-
"grad_norm": 63.92591094970703,
|
219 |
-
"learning_rate": 2.967741935483871e-05,
|
220 |
-
"loss": 7.4737,
|
221 |
-
"step": 29
|
222 |
-
},
|
223 |
-
{
|
224 |
-
"epoch": 0.0019393939393939393,
|
225 |
-
"grad_norm": 59.86642837524414,
|
226 |
-
"learning_rate": 3.096774193548387e-05,
|
227 |
-
"loss": 6.8395,
|
228 |
-
"step": 30
|
229 |
-
},
|
230 |
-
{
|
231 |
-
"epoch": 0.002004040404040404,
|
232 |
-
"grad_norm": 61.897117614746094,
|
233 |
-
"learning_rate": 3.2258064516129034e-05,
|
234 |
-
"loss": 6.788,
|
235 |
-
"step": 31
|
236 |
-
},
|
237 |
-
{
|
238 |
-
"epoch": 0.0020686868686868687,
|
239 |
-
"grad_norm": 68.02660369873047,
|
240 |
-
"learning_rate": 3.3548387096774195e-05,
|
241 |
-
"loss": 6.5021,
|
242 |
-
"step": 32
|
243 |
-
},
|
244 |
-
{
|
245 |
-
"epoch": 0.0020686868686868687,
|
246 |
-
"eval_bleu": 3.747450816674144,
|
247 |
-
"eval_loss": 3.330573558807373,
|
248 |
-
"eval_runtime": 1.3643,
|
249 |
-
"eval_samples_per_second": 11.728,
|
250 |
-
"eval_steps_per_second": 2.932,
|
251 |
-
"step": 32
|
252 |
-
},
|
253 |
-
{
|
254 |
-
"epoch": 0.0021333333333333334,
|
255 |
-
"grad_norm": 59.149505615234375,
|
256 |
-
"learning_rate": 3.483870967741936e-05,
|
257 |
-
"loss": 5.7595,
|
258 |
-
"step": 33
|
259 |
-
},
|
260 |
-
{
|
261 |
-
"epoch": 0.0021979797979797978,
|
262 |
-
"grad_norm": 62.66898727416992,
|
263 |
-
"learning_rate": 3.612903225806452e-05,
|
264 |
-
"loss": 5.3139,
|
265 |
-
"step": 34
|
266 |
-
},
|
267 |
-
{
|
268 |
-
"epoch": 0.0022626262626262625,
|
269 |
-
"grad_norm": 59.711551666259766,
|
270 |
-
"learning_rate": 3.741935483870968e-05,
|
271 |
-
"loss": 5.1228,
|
272 |
-
"step": 35
|
273 |
-
},
|
274 |
-
{
|
275 |
-
"epoch": 0.0023272727272727273,
|
276 |
-
"grad_norm": 55.77004623413086,
|
277 |
-
"learning_rate": 3.870967741935484e-05,
|
278 |
-
"loss": 4.1742,
|
279 |
-
"step": 36
|
280 |
-
},
|
281 |
-
{
|
282 |
-
"epoch": 0.002391919191919192,
|
283 |
-
"grad_norm": 52.294471740722656,
|
284 |
-
"learning_rate": 4e-05,
|
285 |
-
"loss": 4.2128,
|
286 |
-
"step": 37
|
287 |
-
},
|
288 |
-
{
|
289 |
-
"epoch": 0.0024565656565656564,
|
290 |
-
"grad_norm": 51.9980583190918,
|
291 |
-
"learning_rate": 4.1290322580645165e-05,
|
292 |
-
"loss": 3.3269,
|
293 |
-
"step": 38
|
294 |
-
},
|
295 |
-
{
|
296 |
-
"epoch": 0.002521212121212121,
|
297 |
-
"grad_norm": 40.03384017944336,
|
298 |
-
"learning_rate": 4.258064516129032e-05,
|
299 |
-
"loss": 2.7555,
|
300 |
-
"step": 39
|
301 |
-
},
|
302 |
-
{
|
303 |
-
"epoch": 0.002585858585858586,
|
304 |
-
"grad_norm": 33.953269958496094,
|
305 |
-
"learning_rate": 4.387096774193549e-05,
|
306 |
-
"loss": 2.4034,
|
307 |
-
"step": 40
|
308 |
-
},
|
309 |
-
{
|
310 |
-
"epoch": 0.0026505050505050507,
|
311 |
-
"grad_norm": 29.490108489990234,
|
312 |
-
"learning_rate": 4.516129032258064e-05,
|
313 |
-
"loss": 2.1995,
|
314 |
-
"step": 41
|
315 |
-
},
|
316 |
-
{
|
317 |
-
"epoch": 0.002715151515151515,
|
318 |
-
"grad_norm": 56.527854919433594,
|
319 |
-
"learning_rate": 4.645161290322581e-05,
|
320 |
-
"loss": 1.7884,
|
321 |
-
"step": 42
|
322 |
-
},
|
323 |
-
{
|
324 |
-
"epoch": 0.00277979797979798,
|
325 |
-
"grad_norm": 21.921388626098633,
|
326 |
-
"learning_rate": 4.774193548387097e-05,
|
327 |
-
"loss": 1.8252,
|
328 |
-
"step": 43
|
329 |
-
},
|
330 |
-
{
|
331 |
-
"epoch": 0.0028444444444444446,
|
332 |
-
"grad_norm": 18.03729820251465,
|
333 |
-
"learning_rate": 4.903225806451613e-05,
|
334 |
-
"loss": 1.706,
|
335 |
-
"step": 44
|
336 |
-
},
|
337 |
-
{
|
338 |
-
"epoch": 0.002909090909090909,
|
339 |
-
"grad_norm": 11.620859146118164,
|
340 |
-
"learning_rate": 5.032258064516129e-05,
|
341 |
-
"loss": 1.4186,
|
342 |
-
"step": 45
|
343 |
-
},
|
344 |
-
{
|
345 |
-
"epoch": 0.0029737373737373737,
|
346 |
-
"grad_norm": 10.846900939941406,
|
347 |
-
"learning_rate": 5.161290322580645e-05,
|
348 |
-
"loss": 1.3585,
|
349 |
-
"step": 46
|
350 |
-
},
|
351 |
-
{
|
352 |
-
"epoch": 0.0030383838383838385,
|
353 |
-
"grad_norm": 9.786290168762207,
|
354 |
-
"learning_rate": 5.290322580645162e-05,
|
355 |
-
"loss": 1.345,
|
356 |
-
"step": 47
|
357 |
-
},
|
358 |
-
{
|
359 |
-
"epoch": 0.0031030303030303032,
|
360 |
-
"grad_norm": 27.742897033691406,
|
361 |
-
"learning_rate": 5.419354838709678e-05,
|
362 |
-
"loss": 1.1557,
|
363 |
-
"step": 48
|
364 |
-
},
|
365 |
-
{
|
366 |
-
"epoch": 0.0031030303030303032,
|
367 |
-
"eval_bleu": 0.0,
|
368 |
-
"eval_loss": 0.3357515335083008,
|
369 |
-
"eval_runtime": 1.3804,
|
370 |
-
"eval_samples_per_second": 11.591,
|
371 |
-
"eval_steps_per_second": 2.898,
|
372 |
-
"step": 48
|
373 |
-
},
|
374 |
-
{
|
375 |
-
"epoch": 0.0031676767676767676,
|
376 |
-
"grad_norm": 3.792893409729004,
|
377 |
-
"learning_rate": 5.5483870967741936e-05,
|
378 |
-
"loss": 1.0484,
|
379 |
-
"step": 49
|
380 |
-
},
|
381 |
-
{
|
382 |
-
"epoch": 0.0032323232323232323,
|
383 |
-
"grad_norm": 6.693092346191406,
|
384 |
-
"learning_rate": 5.67741935483871e-05,
|
385 |
-
"loss": 1.1635,
|
386 |
-
"step": 50
|
387 |
-
},
|
388 |
-
{
|
389 |
-
"epoch": 0.003296969696969697,
|
390 |
-
"grad_norm": 3.844968795776367,
|
391 |
-
"learning_rate": 5.8064516129032266e-05,
|
392 |
-
"loss": 1.0061,
|
393 |
-
"step": 51
|
394 |
-
},
|
395 |
-
{
|
396 |
-
"epoch": 0.0033616161616161614,
|
397 |
-
"grad_norm": 4.14786958694458,
|
398 |
-
"learning_rate": 5.935483870967742e-05,
|
399 |
-
"loss": 0.9577,
|
400 |
-
"step": 52
|
401 |
-
},
|
402 |
-
{
|
403 |
-
"epoch": 0.003426262626262626,
|
404 |
-
"grad_norm": 4.455865859985352,
|
405 |
-
"learning_rate": 6.064516129032258e-05,
|
406 |
-
"loss": 0.8501,
|
407 |
-
"step": 53
|
408 |
-
},
|
409 |
-
{
|
410 |
-
"epoch": 0.003490909090909091,
|
411 |
-
"grad_norm": 8.088818550109863,
|
412 |
-
"learning_rate": 6.193548387096774e-05,
|
413 |
-
"loss": 0.7826,
|
414 |
-
"step": 54
|
415 |
-
},
|
416 |
-
{
|
417 |
-
"epoch": 0.0035555555555555557,
|
418 |
-
"grad_norm": 3.6797592639923096,
|
419 |
-
"learning_rate": 6.32258064516129e-05,
|
420 |
-
"loss": 0.7374,
|
421 |
-
"step": 55
|
422 |
-
},
|
423 |
-
{
|
424 |
-
"epoch": 0.00362020202020202,
|
425 |
-
"grad_norm": 3.0284504890441895,
|
426 |
-
"learning_rate": 6.451612903225807e-05,
|
427 |
-
"loss": 0.6646,
|
428 |
-
"step": 56
|
429 |
-
},
|
430 |
-
{
|
431 |
-
"epoch": 0.003684848484848485,
|
432 |
-
"grad_norm": 2.8636531829833984,
|
433 |
-
"learning_rate": 6.580645161290323e-05,
|
434 |
-
"loss": 0.6316,
|
435 |
-
"step": 57
|
436 |
-
},
|
437 |
-
{
|
438 |
-
"epoch": 0.0037494949494949496,
|
439 |
-
"grad_norm": 2.4601149559020996,
|
440 |
-
"learning_rate": 6.709677419354839e-05,
|
441 |
-
"loss": 0.5411,
|
442 |
-
"step": 58
|
443 |
-
},
|
444 |
-
{
|
445 |
-
"epoch": 0.003814141414141414,
|
446 |
-
"grad_norm": 2.3120172023773193,
|
447 |
-
"learning_rate": 6.838709677419355e-05,
|
448 |
-
"loss": 0.4993,
|
449 |
-
"step": 59
|
450 |
-
},
|
451 |
-
{
|
452 |
-
"epoch": 0.0038787878787878787,
|
453 |
-
"grad_norm": 1.8615421056747437,
|
454 |
-
"learning_rate": 6.967741935483871e-05,
|
455 |
-
"loss": 0.4766,
|
456 |
-
"step": 60
|
457 |
-
},
|
458 |
-
{
|
459 |
-
"epoch": 0.0039434343434343435,
|
460 |
-
"grad_norm": 1.5349754095077515,
|
461 |
-
"learning_rate": 7.096774193548388e-05,
|
462 |
-
"loss": 0.4203,
|
463 |
-
"step": 61
|
464 |
-
},
|
465 |
-
{
|
466 |
-
"epoch": 0.004008080808080808,
|
467 |
-
"grad_norm": 1.7214694023132324,
|
468 |
-
"learning_rate": 7.225806451612904e-05,
|
469 |
-
"loss": 0.4294,
|
470 |
-
"step": 62
|
471 |
-
},
|
472 |
-
{
|
473 |
-
"epoch": 0.004072727272727273,
|
474 |
-
"grad_norm": 1.5352543592453003,
|
475 |
-
"learning_rate": 7.35483870967742e-05,
|
476 |
-
"loss": 0.3648,
|
477 |
-
"step": 63
|
478 |
-
},
|
479 |
-
{
|
480 |
-
"epoch": 0.004137373737373737,
|
481 |
-
"grad_norm": 0.8122034072875977,
|
482 |
-
"learning_rate": 7.483870967741936e-05,
|
483 |
-
"loss": 0.3224,
|
484 |
-
"step": 64
|
485 |
-
},
|
486 |
-
{
|
487 |
-
"epoch": 0.004137373737373737,
|
488 |
-
"eval_bleu": 0.0,
|
489 |
-
"eval_loss": 0.24285614490509033,
|
490 |
-
"eval_runtime": 1.3711,
|
491 |
-
"eval_samples_per_second": 11.67,
|
492 |
-
"eval_steps_per_second": 2.917,
|
493 |
-
"step": 64
|
494 |
-
},
|
495 |
-
{
|
496 |
-
"epoch": 0.004202020202020202,
|
497 |
-
"grad_norm": 0.8955532312393188,
|
498 |
-
"learning_rate": 7.612903225806451e-05,
|
499 |
-
"loss": 0.347,
|
500 |
-
"step": 65
|
501 |
-
},
|
502 |
-
{
|
503 |
-
"epoch": 0.004266666666666667,
|
504 |
-
"grad_norm": 0.5511079430580139,
|
505 |
-
"learning_rate": 7.741935483870968e-05,
|
506 |
-
"loss": 0.3468,
|
507 |
-
"step": 66
|
508 |
-
},
|
509 |
-
{
|
510 |
-
"epoch": 0.004331313131313131,
|
511 |
-
"grad_norm": 0.8366074562072754,
|
512 |
-
"learning_rate": 7.870967741935484e-05,
|
513 |
-
"loss": 0.2938,
|
514 |
-
"step": 67
|
515 |
-
},
|
516 |
-
{
|
517 |
-
"epoch": 0.0043959595959595955,
|
518 |
-
"grad_norm": 0.43903565406799316,
|
519 |
-
"learning_rate": 8e-05,
|
520 |
-
"loss": 0.2681,
|
521 |
-
"step": 68
|
522 |
-
},
|
523 |
-
{
|
524 |
-
"epoch": 0.004460606060606061,
|
525 |
-
"grad_norm": 0.5412452816963196,
|
526 |
-
"learning_rate": 8.129032258064517e-05,
|
527 |
-
"loss": 0.2725,
|
528 |
-
"step": 69
|
529 |
-
},
|
530 |
-
{
|
531 |
-
"epoch": 0.004525252525252525,
|
532 |
-
"grad_norm": 0.4017622470855713,
|
533 |
-
"learning_rate": 8.258064516129033e-05,
|
534 |
-
"loss": 0.2347,
|
535 |
-
"step": 70
|
536 |
-
},
|
537 |
-
{
|
538 |
-
"epoch": 0.00458989898989899,
|
539 |
-
"grad_norm": 22.646089553833008,
|
540 |
-
"learning_rate": 8.387096774193549e-05,
|
541 |
-
"loss": 0.2836,
|
542 |
-
"step": 71
|
543 |
-
},
|
544 |
-
{
|
545 |
-
"epoch": 0.004654545454545455,
|
546 |
-
"grad_norm": 0.3233143091201782,
|
547 |
-
"learning_rate": 8.516129032258064e-05,
|
548 |
-
"loss": 0.2306,
|
549 |
-
"step": 72
|
550 |
-
},
|
551 |
-
{
|
552 |
-
"epoch": 0.004719191919191919,
|
553 |
-
"grad_norm": 0.3327657878398895,
|
554 |
-
"learning_rate": 8.645161290322581e-05,
|
555 |
-
"loss": 0.2516,
|
556 |
-
"step": 73
|
557 |
-
},
|
558 |
-
{
|
559 |
-
"epoch": 0.004783838383838384,
|
560 |
-
"grad_norm": 0.32695600390434265,
|
561 |
-
"learning_rate": 8.774193548387098e-05,
|
562 |
-
"loss": 0.2525,
|
563 |
-
"step": 74
|
564 |
-
},
|
565 |
-
{
|
566 |
-
"epoch": 0.0048484848484848485,
|
567 |
-
"grad_norm": 0.2796344459056854,
|
568 |
-
"learning_rate": 8.903225806451614e-05,
|
569 |
-
"loss": 0.2679,
|
570 |
-
"step": 75
|
571 |
-
},
|
572 |
-
{
|
573 |
-
"epoch": 0.004913131313131313,
|
574 |
-
"grad_norm": 0.26281794905662537,
|
575 |
-
"learning_rate": 9.032258064516129e-05,
|
576 |
-
"loss": 0.2468,
|
577 |
-
"step": 76
|
578 |
-
},
|
579 |
-
{
|
580 |
-
"epoch": 0.004977777777777778,
|
581 |
-
"grad_norm": 0.2546544075012207,
|
582 |
-
"learning_rate": 9.161290322580646e-05,
|
583 |
-
"loss": 0.237,
|
584 |
-
"step": 77
|
585 |
-
},
|
586 |
-
{
|
587 |
-
"epoch": 0.005042424242424242,
|
588 |
-
"grad_norm": 0.24231921136379242,
|
589 |
-
"learning_rate": 9.290322580645162e-05,
|
590 |
-
"loss": 0.2529,
|
591 |
-
"step": 78
|
592 |
-
},
|
593 |
-
{
|
594 |
-
"epoch": 0.005107070707070707,
|
595 |
-
"grad_norm": 0.24682262539863586,
|
596 |
-
"learning_rate": 9.419354838709677e-05,
|
597 |
-
"loss": 0.2292,
|
598 |
-
"step": 79
|
599 |
-
},
|
600 |
-
{
|
601 |
-
"epoch": 0.005171717171717172,
|
602 |
-
"grad_norm": 0.30323857069015503,
|
603 |
-
"learning_rate": 9.548387096774195e-05,
|
604 |
-
"loss": 0.2191,
|
605 |
-
"step": 80
|
606 |
-
},
|
607 |
-
{
|
608 |
-
"epoch": 0.005171717171717172,
|
609 |
-
"eval_bleu": 0.0,
|
610 |
-
"eval_loss": 0.19482067227363586,
|
611 |
-
"eval_runtime": 1.3813,
|
612 |
-
"eval_samples_per_second": 11.584,
|
613 |
-
"eval_steps_per_second": 2.896,
|
614 |
-
"step": 80
|
615 |
-
},
|
616 |
-
{
|
617 |
-
"epoch": 0.005236363636363636,
|
618 |
-
"grad_norm": 0.21761713922023773,
|
619 |
-
"learning_rate": 9.677419354838711e-05,
|
620 |
-
"loss": 0.2414,
|
621 |
-
"step": 81
|
622 |
-
},
|
623 |
-
{
|
624 |
-
"epoch": 0.005301010101010101,
|
625 |
-
"grad_norm": 0.23040293157100677,
|
626 |
-
"learning_rate": 9.806451612903226e-05,
|
627 |
-
"loss": 0.2274,
|
628 |
-
"step": 82
|
629 |
-
},
|
630 |
-
{
|
631 |
-
"epoch": 0.005365656565656566,
|
632 |
-
"grad_norm": 0.23759864270687103,
|
633 |
-
"learning_rate": 9.935483870967742e-05,
|
634 |
-
"loss": 0.2126,
|
635 |
-
"step": 83
|
636 |
-
},
|
637 |
-
{
|
638 |
-
"epoch": 0.00543030303030303,
|
639 |
-
"grad_norm": 0.19173021614551544,
|
640 |
-
"learning_rate": 0.00010064516129032258,
|
641 |
-
"loss": 0.2219,
|
642 |
-
"step": 84
|
643 |
-
},
|
644 |
-
{
|
645 |
-
"epoch": 0.005494949494949495,
|
646 |
-
"grad_norm": 0.19772595167160034,
|
647 |
-
"learning_rate": 0.00010193548387096774,
|
648 |
-
"loss": 0.2046,
|
649 |
-
"step": 85
|
650 |
-
},
|
651 |
-
{
|
652 |
-
"epoch": 0.00555959595959596,
|
653 |
-
"grad_norm": 0.19209372997283936,
|
654 |
-
"learning_rate": 0.0001032258064516129,
|
655 |
-
"loss": 0.2202,
|
656 |
-
"step": 86
|
657 |
-
},
|
658 |
-
{
|
659 |
-
"epoch": 0.005624242424242424,
|
660 |
-
"grad_norm": 0.18713383376598358,
|
661 |
-
"learning_rate": 0.00010451612903225806,
|
662 |
-
"loss": 0.2279,
|
663 |
-
"step": 87
|
664 |
-
},
|
665 |
-
{
|
666 |
-
"epoch": 0.005688888888888889,
|
667 |
-
"grad_norm": 0.20889417827129364,
|
668 |
-
"learning_rate": 0.00010580645161290324,
|
669 |
-
"loss": 0.1787,
|
670 |
-
"step": 88
|
671 |
-
},
|
672 |
-
{
|
673 |
-
"epoch": 0.0057535353535353535,
|
674 |
-
"grad_norm": 0.17321723699569702,
|
675 |
-
"learning_rate": 0.0001070967741935484,
|
676 |
-
"loss": 0.2047,
|
677 |
-
"step": 89
|
678 |
-
},
|
679 |
-
{
|
680 |
-
"epoch": 0.005818181818181818,
|
681 |
-
"grad_norm": 0.25413277745246887,
|
682 |
-
"learning_rate": 0.00010838709677419356,
|
683 |
-
"loss": 0.2226,
|
684 |
-
"step": 90
|
685 |
-
},
|
686 |
-
{
|
687 |
-
"epoch": 0.005882828282828283,
|
688 |
-
"grad_norm": 0.1873357743024826,
|
689 |
-
"learning_rate": 0.00010967741935483871,
|
690 |
-
"loss": 0.1974,
|
691 |
-
"step": 91
|
692 |
-
},
|
693 |
-
{
|
694 |
-
"epoch": 0.005947474747474747,
|
695 |
-
"grad_norm": 0.14157669246196747,
|
696 |
-
"learning_rate": 0.00011096774193548387,
|
697 |
-
"loss": 0.1757,
|
698 |
-
"step": 92
|
699 |
-
},
|
700 |
-
{
|
701 |
-
"epoch": 0.006012121212121212,
|
702 |
-
"grad_norm": 0.18100616335868835,
|
703 |
-
"learning_rate": 0.00011225806451612903,
|
704 |
-
"loss": 0.193,
|
705 |
-
"step": 93
|
706 |
-
},
|
707 |
-
{
|
708 |
-
"epoch": 0.006076767676767677,
|
709 |
-
"grad_norm": 0.17187540233135223,
|
710 |
-
"learning_rate": 0.0001135483870967742,
|
711 |
-
"loss": 0.1996,
|
712 |
-
"step": 94
|
713 |
-
},
|
714 |
-
{
|
715 |
-
"epoch": 0.006141414141414141,
|
716 |
-
"grad_norm": 0.25643497705459595,
|
717 |
-
"learning_rate": 0.00011483870967741937,
|
718 |
-
"loss": 0.1944,
|
719 |
-
"step": 95
|
720 |
-
},
|
721 |
-
{
|
722 |
-
"epoch": 0.0062060606060606064,
|
723 |
-
"grad_norm": 0.3475594222545624,
|
724 |
-
"learning_rate": 0.00011612903225806453,
|
725 |
-
"loss": 0.1821,
|
726 |
-
"step": 96
|
727 |
-
},
|
728 |
-
{
|
729 |
-
"epoch": 0.0062060606060606064,
|
730 |
-
"eval_bleu": 0.0,
|
731 |
-
"eval_loss": 0.15860861539840698,
|
732 |
-
"eval_runtime": 1.3903,
|
733 |
-
"eval_samples_per_second": 11.508,
|
734 |
-
"eval_steps_per_second": 2.877,
|
735 |
-
"step": 96
|
736 |
-
},
|
737 |
-
{
|
738 |
-
"epoch": 0.006270707070707071,
|
739 |
-
"grad_norm": 0.17301534116268158,
|
740 |
-
"learning_rate": 0.00011741935483870967,
|
741 |
-
"loss": 0.1964,
|
742 |
-
"step": 97
|
743 |
-
},
|
744 |
-
{
|
745 |
-
"epoch": 0.006335353535353535,
|
746 |
-
"grad_norm": 0.13940928876399994,
|
747 |
-
"learning_rate": 0.00011870967741935484,
|
748 |
-
"loss": 0.1702,
|
749 |
-
"step": 98
|
750 |
-
},
|
751 |
-
{
|
752 |
-
"epoch": 0.0064,
|
753 |
-
"grad_norm": 0.1584329903125763,
|
754 |
-
"learning_rate": 0.00012,
|
755 |
-
"loss": 0.1667,
|
756 |
-
"step": 99
|
757 |
-
},
|
758 |
-
{
|
759 |
-
"epoch": 0.006464646464646465,
|
760 |
-
"grad_norm": 0.19155238568782806,
|
761 |
-
"learning_rate": 0.00012129032258064516,
|
762 |
-
"loss": 0.1941,
|
763 |
-
"step": 100
|
764 |
-
},
|
765 |
-
{
|
766 |
-
"epoch": 0.006529292929292929,
|
767 |
-
"grad_norm": 0.1830209642648697,
|
768 |
-
"learning_rate": 0.00012258064516129034,
|
769 |
-
"loss": 0.1794,
|
770 |
-
"step": 101
|
771 |
-
},
|
772 |
-
{
|
773 |
-
"epoch": 0.006593939393939394,
|
774 |
-
"grad_norm": 0.9416115880012512,
|
775 |
-
"learning_rate": 0.0001238709677419355,
|
776 |
-
"loss": 0.1799,
|
777 |
-
"step": 102
|
778 |
-
},
|
779 |
-
{
|
780 |
-
"epoch": 0.0066585858585858585,
|
781 |
-
"grad_norm": 0.13209928572177887,
|
782 |
-
"learning_rate": 0.00012516129032258066,
|
783 |
-
"loss": 0.1715,
|
784 |
-
"step": 103
|
785 |
-
},
|
786 |
-
{
|
787 |
-
"epoch": 0.006723232323232323,
|
788 |
-
"grad_norm": 0.15749603509902954,
|
789 |
-
"learning_rate": 0.0001264516129032258,
|
790 |
-
"loss": 0.1809,
|
791 |
-
"step": 104
|
792 |
-
},
|
793 |
-
{
|
794 |
-
"epoch": 0.006787878787878788,
|
795 |
-
"grad_norm": 0.1440904438495636,
|
796 |
-
"learning_rate": 0.00012774193548387096,
|
797 |
-
"loss": 0.1883,
|
798 |
-
"step": 105
|
799 |
-
},
|
800 |
-
{
|
801 |
-
"epoch": 0.006852525252525252,
|
802 |
-
"grad_norm": 0.13838624954223633,
|
803 |
-
"learning_rate": 0.00012903225806451613,
|
804 |
-
"loss": 0.1835,
|
805 |
-
"step": 106
|
806 |
-
},
|
807 |
-
{
|
808 |
-
"epoch": 0.006917171717171718,
|
809 |
-
"grad_norm": 0.13904741406440735,
|
810 |
-
"learning_rate": 0.0001303225806451613,
|
811 |
-
"loss": 0.1723,
|
812 |
-
"step": 107
|
813 |
-
},
|
814 |
-
{
|
815 |
-
"epoch": 0.006981818181818182,
|
816 |
-
"grad_norm": 0.18367218971252441,
|
817 |
-
"learning_rate": 0.00013161290322580646,
|
818 |
-
"loss": 0.1888,
|
819 |
-
"step": 108
|
820 |
-
},
|
821 |
-
{
|
822 |
-
"epoch": 0.007046464646464646,
|
823 |
-
"grad_norm": 0.12487432360649109,
|
824 |
-
"learning_rate": 0.00013290322580645163,
|
825 |
-
"loss": 0.1821,
|
826 |
-
"step": 109
|
827 |
-
},
|
828 |
-
{
|
829 |
-
"epoch": 0.0071111111111111115,
|
830 |
-
"grad_norm": 0.13827675580978394,
|
831 |
-
"learning_rate": 0.00013419354838709678,
|
832 |
-
"loss": 0.1733,
|
833 |
-
"step": 110
|
834 |
-
},
|
835 |
-
{
|
836 |
-
"epoch": 0.007175757575757576,
|
837 |
-
"grad_norm": 0.12497523427009583,
|
838 |
-
"learning_rate": 0.00013548387096774193,
|
839 |
-
"loss": 0.1804,
|
840 |
-
"step": 111
|
841 |
-
},
|
842 |
-
{
|
843 |
-
"epoch": 0.00724040404040404,
|
844 |
-
"grad_norm": 0.11209689825773239,
|
845 |
-
"learning_rate": 0.0001367741935483871,
|
846 |
-
"loss": 0.1782,
|
847 |
-
"step": 112
|
848 |
-
},
|
849 |
-
{
|
850 |
-
"epoch": 0.00724040404040404,
|
851 |
-
"eval_bleu": 0.0,
|
852 |
-
"eval_loss": 0.14075997471809387,
|
853 |
-
"eval_runtime": 1.3802,
|
854 |
-
"eval_samples_per_second": 11.593,
|
855 |
-
"eval_steps_per_second": 2.898,
|
856 |
-
"step": 112
|
857 |
-
},
|
858 |
-
{
|
859 |
-
"epoch": 0.007305050505050505,
|
860 |
-
"grad_norm": 0.1200064942240715,
|
861 |
-
"learning_rate": 0.00013806451612903225,
|
862 |
-
"loss": 0.1665,
|
863 |
-
"step": 113
|
864 |
-
},
|
865 |
-
{
|
866 |
-
"epoch": 0.00736969696969697,
|
867 |
-
"grad_norm": 0.2869766652584076,
|
868 |
-
"learning_rate": 0.00013935483870967743,
|
869 |
-
"loss": 0.164,
|
870 |
-
"step": 114
|
871 |
-
},
|
872 |
-
{
|
873 |
-
"epoch": 0.007434343434343434,
|
874 |
-
"grad_norm": 0.12517796456813812,
|
875 |
-
"learning_rate": 0.0001406451612903226,
|
876 |
-
"loss": 0.1584,
|
877 |
-
"step": 115
|
878 |
-
},
|
879 |
-
{
|
880 |
-
"epoch": 0.007498989898989899,
|
881 |
-
"grad_norm": 0.11323254555463791,
|
882 |
-
"learning_rate": 0.00014193548387096775,
|
883 |
-
"loss": 0.1593,
|
884 |
-
"step": 116
|
885 |
-
},
|
886 |
-
{
|
887 |
-
"epoch": 0.0075636363636363635,
|
888 |
-
"grad_norm": 0.23315002024173737,
|
889 |
-
"learning_rate": 0.00014322580645161293,
|
890 |
-
"loss": 0.1725,
|
891 |
-
"step": 117
|
892 |
-
},
|
893 |
-
{
|
894 |
-
"epoch": 0.007628282828282828,
|
895 |
-
"grad_norm": 0.11487537622451782,
|
896 |
-
"learning_rate": 0.00014451612903225807,
|
897 |
-
"loss": 0.1554,
|
898 |
-
"step": 118
|
899 |
-
},
|
900 |
-
{
|
901 |
-
"epoch": 0.007692929292929293,
|
902 |
-
"grad_norm": 0.11991633474826813,
|
903 |
-
"learning_rate": 0.00014580645161290322,
|
904 |
-
"loss": 0.1525,
|
905 |
-
"step": 119
|
906 |
-
},
|
907 |
-
{
|
908 |
-
"epoch": 0.007757575757575757,
|
909 |
-
"grad_norm": 0.17173829674720764,
|
910 |
-
"learning_rate": 0.0001470967741935484,
|
911 |
-
"loss": 0.1961,
|
912 |
-
"step": 120
|
913 |
-
},
|
914 |
-
{
|
915 |
-
"epoch": 0.007822222222222222,
|
916 |
-
"grad_norm": 0.22421815991401672,
|
917 |
-
"learning_rate": 0.00014838709677419355,
|
918 |
-
"loss": 0.1491,
|
919 |
-
"step": 121
|
920 |
-
},
|
921 |
-
{
|
922 |
-
"epoch": 0.007886868686868687,
|
923 |
-
"grad_norm": 0.10590796172618866,
|
924 |
-
"learning_rate": 0.00014967741935483872,
|
925 |
-
"loss": 0.1466,
|
926 |
-
"step": 122
|
927 |
-
},
|
928 |
-
{
|
929 |
-
"epoch": 0.007951515151515152,
|
930 |
-
"grad_norm": 0.15446847677230835,
|
931 |
-
"learning_rate": 0.0001509677419354839,
|
932 |
-
"loss": 0.1506,
|
933 |
-
"step": 123
|
934 |
-
},
|
935 |
-
{
|
936 |
-
"epoch": 0.008016161616161616,
|
937 |
-
"grad_norm": 0.1181936264038086,
|
938 |
-
"learning_rate": 0.00015225806451612902,
|
939 |
-
"loss": 0.1483,
|
940 |
-
"step": 124
|
941 |
-
},
|
942 |
-
{
|
943 |
-
"epoch": 0.00808080808080808,
|
944 |
-
"grad_norm": 0.12403552234172821,
|
945 |
-
"learning_rate": 0.0001535483870967742,
|
946 |
-
"loss": 0.1602,
|
947 |
-
"step": 125
|
948 |
-
},
|
949 |
-
{
|
950 |
-
"epoch": 0.008145454545454546,
|
951 |
-
"grad_norm": 0.12927326560020447,
|
952 |
-
"learning_rate": 0.00015483870967741937,
|
953 |
-
"loss": 0.173,
|
954 |
-
"step": 126
|
955 |
-
},
|
956 |
-
{
|
957 |
-
"epoch": 0.00821010101010101,
|
958 |
-
"grad_norm": 0.7120084762573242,
|
959 |
-
"learning_rate": 0.00015612903225806451,
|
960 |
-
"loss": 0.1561,
|
961 |
-
"step": 127
|
962 |
-
},
|
963 |
-
{
|
964 |
-
"epoch": 0.008274747474747475,
|
965 |
-
"grad_norm": 0.09934462606906891,
|
966 |
-
"learning_rate": 0.0001574193548387097,
|
967 |
-
"loss": 0.1433,
|
968 |
-
"step": 128
|
969 |
-
},
|
970 |
-
{
|
971 |
-
"epoch": 0.008274747474747475,
|
972 |
-
"eval_bleu": 0.0,
|
973 |
-
"eval_loss": 0.12477699667215347,
|
974 |
-
"eval_runtime": 1.3776,
|
975 |
-
"eval_samples_per_second": 11.614,
|
976 |
-
"eval_steps_per_second": 2.904,
|
977 |
-
"step": 128
|
978 |
-
},
|
979 |
-
{
|
980 |
-
"epoch": 0.00833939393939394,
|
981 |
-
"grad_norm": 0.11033691465854645,
|
982 |
-
"learning_rate": 0.00015870967741935487,
|
983 |
-
"loss": 0.1615,
|
984 |
-
"step": 129
|
985 |
-
},
|
986 |
-
{
|
987 |
-
"epoch": 0.008404040404040403,
|
988 |
-
"grad_norm": 0.12420456856489182,
|
989 |
-
"learning_rate": 0.00016,
|
990 |
-
"loss": 0.1477,
|
991 |
-
"step": 130
|
992 |
-
},
|
993 |
-
{
|
994 |
-
"epoch": 0.008468686868686869,
|
995 |
-
"grad_norm": 0.1626136600971222,
|
996 |
-
"learning_rate": 0.00016129032258064516,
|
997 |
-
"loss": 0.1624,
|
998 |
-
"step": 131
|
999 |
-
},
|
1000 |
-
{
|
1001 |
-
"epoch": 0.008533333333333334,
|
1002 |
-
"grad_norm": 0.1007506251335144,
|
1003 |
-
"learning_rate": 0.00016258064516129034,
|
1004 |
-
"loss": 0.1499,
|
1005 |
-
"step": 132
|
1006 |
-
},
|
1007 |
-
{
|
1008 |
-
"epoch": 0.008597979797979797,
|
1009 |
-
"grad_norm": 0.35909000039100647,
|
1010 |
-
"learning_rate": 0.00016387096774193548,
|
1011 |
-
"loss": 0.1646,
|
1012 |
-
"step": 133
|
1013 |
-
},
|
1014 |
-
{
|
1015 |
-
"epoch": 0.008662626262626262,
|
1016 |
-
"grad_norm": 0.139847531914711,
|
1017 |
-
"learning_rate": 0.00016516129032258066,
|
1018 |
-
"loss": 0.1531,
|
1019 |
-
"step": 134
|
1020 |
-
},
|
1021 |
-
{
|
1022 |
-
"epoch": 0.008727272727272728,
|
1023 |
-
"grad_norm": 0.14874647557735443,
|
1024 |
-
"learning_rate": 0.0001664516129032258,
|
1025 |
-
"loss": 0.1518,
|
1026 |
-
"step": 135
|
1027 |
-
},
|
1028 |
-
{
|
1029 |
-
"epoch": 0.008791919191919191,
|
1030 |
-
"grad_norm": 0.0868111178278923,
|
1031 |
-
"learning_rate": 0.00016774193548387098,
|
1032 |
-
"loss": 0.1429,
|
1033 |
-
"step": 136
|
1034 |
-
},
|
1035 |
-
{
|
1036 |
-
"epoch": 0.008856565656565656,
|
1037 |
-
"grad_norm": 0.13478216528892517,
|
1038 |
-
"learning_rate": 0.00016903225806451616,
|
1039 |
-
"loss": 0.1591,
|
1040 |
-
"step": 137
|
1041 |
-
},
|
1042 |
-
{
|
1043 |
-
"epoch": 0.008921212121212121,
|
1044 |
-
"grad_norm": 0.27142342925071716,
|
1045 |
-
"learning_rate": 0.00017032258064516128,
|
1046 |
-
"loss": 0.1421,
|
1047 |
-
"step": 138
|
1048 |
-
},
|
1049 |
-
{
|
1050 |
-
"epoch": 0.008985858585858587,
|
1051 |
-
"grad_norm": 0.10351862758398056,
|
1052 |
-
"learning_rate": 0.00017161290322580645,
|
1053 |
-
"loss": 0.1314,
|
1054 |
-
"step": 139
|
1055 |
-
},
|
1056 |
-
{
|
1057 |
-
"epoch": 0.00905050505050505,
|
1058 |
-
"grad_norm": 0.2667485177516937,
|
1059 |
-
"learning_rate": 0.00017290322580645163,
|
1060 |
-
"loss": 0.1383,
|
1061 |
-
"step": 140
|
1062 |
-
},
|
1063 |
-
{
|
1064 |
-
"epoch": 0.009115151515151515,
|
1065 |
-
"grad_norm": 0.12030935287475586,
|
1066 |
-
"learning_rate": 0.00017419354838709678,
|
1067 |
-
"loss": 0.1532,
|
1068 |
-
"step": 141
|
1069 |
-
},
|
1070 |
-
{
|
1071 |
-
"epoch": 0.00917979797979798,
|
1072 |
-
"grad_norm": 0.11514189839363098,
|
1073 |
-
"learning_rate": 0.00017548387096774195,
|
1074 |
-
"loss": 0.1478,
|
1075 |
-
"step": 142
|
1076 |
-
},
|
1077 |
-
{
|
1078 |
-
"epoch": 0.009244444444444444,
|
1079 |
-
"grad_norm": 0.09005405008792877,
|
1080 |
-
"learning_rate": 0.0001767741935483871,
|
1081 |
-
"loss": 0.1323,
|
1082 |
-
"step": 143
|
1083 |
-
},
|
1084 |
-
{
|
1085 |
-
"epoch": 0.00930909090909091,
|
1086 |
-
"grad_norm": 0.09564518928527832,
|
1087 |
-
"learning_rate": 0.00017806451612903228,
|
1088 |
-
"loss": 0.1591,
|
1089 |
-
"step": 144
|
1090 |
-
},
|
1091 |
-
{
|
1092 |
-
"epoch": 0.00930909090909091,
|
1093 |
-
"eval_bleu": 0.0,
|
1094 |
-
"eval_loss": 0.11460547149181366,
|
1095 |
-
"eval_runtime": 1.359,
|
1096 |
-
"eval_samples_per_second": 11.773,
|
1097 |
-
"eval_steps_per_second": 2.943,
|
1098 |
-
"step": 144
|
1099 |
-
},
|
1100 |
-
{
|
1101 |
-
"epoch": 0.009373737373737374,
|
1102 |
-
"grad_norm": 0.0995207279920578,
|
1103 |
-
"learning_rate": 0.00017935483870967742,
|
1104 |
-
"loss": 0.1577,
|
1105 |
-
"step": 145
|
1106 |
-
},
|
1107 |
-
{
|
1108 |
-
"epoch": 0.009438383838383838,
|
1109 |
-
"grad_norm": 0.4307728707790375,
|
1110 |
-
"learning_rate": 0.00018064516129032257,
|
1111 |
-
"loss": 0.1381,
|
1112 |
-
"step": 146
|
1113 |
-
},
|
1114 |
-
{
|
1115 |
-
"epoch": 0.009503030303030303,
|
1116 |
-
"grad_norm": 0.10841380804777145,
|
1117 |
-
"learning_rate": 0.00018193548387096775,
|
1118 |
-
"loss": 0.1695,
|
1119 |
-
"step": 147
|
1120 |
-
},
|
1121 |
-
{
|
1122 |
-
"epoch": 0.009567676767676768,
|
1123 |
-
"grad_norm": 0.08941018581390381,
|
1124 |
-
"learning_rate": 0.00018322580645161292,
|
1125 |
-
"loss": 0.1407,
|
1126 |
-
"step": 148
|
1127 |
-
},
|
1128 |
-
{
|
1129 |
-
"epoch": 0.009632323232323232,
|
1130 |
-
"grad_norm": 0.09527455270290375,
|
1131 |
-
"learning_rate": 0.00018451612903225807,
|
1132 |
-
"loss": 0.1515,
|
1133 |
-
"step": 149
|
1134 |
-
},
|
1135 |
-
{
|
1136 |
-
"epoch": 0.009696969696969697,
|
1137 |
-
"grad_norm": 0.07641109079122543,
|
1138 |
-
"learning_rate": 0.00018580645161290325,
|
1139 |
-
"loss": 0.1433,
|
1140 |
-
"step": 150
|
1141 |
-
},
|
1142 |
-
{
|
1143 |
-
"epoch": 0.009761616161616162,
|
1144 |
-
"grad_norm": 0.1487646847963333,
|
1145 |
-
"learning_rate": 0.0001870967741935484,
|
1146 |
-
"loss": 0.1474,
|
1147 |
-
"step": 151
|
1148 |
-
},
|
1149 |
-
{
|
1150 |
-
"epoch": 0.009826262626262626,
|
1151 |
-
"grad_norm": 0.08308811485767365,
|
1152 |
-
"learning_rate": 0.00018838709677419354,
|
1153 |
-
"loss": 0.1323,
|
1154 |
-
"step": 152
|
1155 |
-
},
|
1156 |
-
{
|
1157 |
-
"epoch": 0.00989090909090909,
|
1158 |
-
"grad_norm": 0.06572406738996506,
|
1159 |
-
"learning_rate": 0.00018967741935483872,
|
1160 |
-
"loss": 0.1407,
|
1161 |
-
"step": 153
|
1162 |
-
},
|
1163 |
-
{
|
1164 |
-
"epoch": 0.009955555555555556,
|
1165 |
-
"grad_norm": 0.24972431361675262,
|
1166 |
-
"learning_rate": 0.0001909677419354839,
|
1167 |
-
"loss": 0.1385,
|
1168 |
-
"step": 154
|
1169 |
-
},
|
1170 |
-
{
|
1171 |
-
"epoch": 0.01002020202020202,
|
1172 |
-
"grad_norm": 0.07581052929162979,
|
1173 |
-
"learning_rate": 0.00019225806451612904,
|
1174 |
-
"loss": 0.1507,
|
1175 |
-
"step": 155
|
1176 |
-
},
|
1177 |
-
{
|
1178 |
-
"epoch": 0.010084848484848485,
|
1179 |
-
"grad_norm": 0.1980135142803192,
|
1180 |
-
"learning_rate": 0.00019354838709677422,
|
1181 |
-
"loss": 0.1419,
|
1182 |
-
"step": 156
|
1183 |
-
},
|
1184 |
-
{
|
1185 |
-
"epoch": 0.01014949494949495,
|
1186 |
-
"grad_norm": 0.06879571825265884,
|
1187 |
-
"learning_rate": 0.00019483870967741936,
|
1188 |
-
"loss": 0.1371,
|
1189 |
-
"step": 157
|
1190 |
-
},
|
1191 |
-
{
|
1192 |
-
"epoch": 0.010214141414141413,
|
1193 |
-
"grad_norm": 0.08168785274028778,
|
1194 |
-
"learning_rate": 0.0001961290322580645,
|
1195 |
-
"loss": 0.143,
|
1196 |
-
"step": 158
|
1197 |
-
},
|
1198 |
-
{
|
1199 |
-
"epoch": 0.010278787878787879,
|
1200 |
-
"grad_norm": 0.13444702327251434,
|
1201 |
-
"learning_rate": 0.00019741935483870969,
|
1202 |
-
"loss": 0.1458,
|
1203 |
-
"step": 159
|
1204 |
-
},
|
1205 |
-
{
|
1206 |
-
"epoch": 0.010343434343434344,
|
1207 |
-
"grad_norm": 0.09177995473146439,
|
1208 |
-
"learning_rate": 0.00019870967741935483,
|
1209 |
-
"loss": 0.1302,
|
1210 |
-
"step": 160
|
1211 |
-
},
|
1212 |
-
{
|
1213 |
-
"epoch": 0.010343434343434344,
|
1214 |
-
"eval_bleu": 0.1344542592045913,
|
1215 |
-
"eval_loss": 0.10595569759607315,
|
1216 |
-
"eval_runtime": 1.353,
|
1217 |
-
"eval_samples_per_second": 11.825,
|
1218 |
-
"eval_steps_per_second": 2.956,
|
1219 |
-
"step": 160
|
1220 |
-
},
|
1221 |
-
{
|
1222 |
-
"epoch": 0.010408080808080807,
|
1223 |
-
"grad_norm": 0.059201959520578384,
|
1224 |
-
"learning_rate": 0.0002,
|
1225 |
-
"loss": 0.1317,
|
1226 |
-
"step": 161
|
1227 |
-
},
|
1228 |
-
{
|
1229 |
-
"epoch": 0.010472727272727272,
|
1230 |
-
"grad_norm": 0.1885806769132614,
|
1231 |
-
"learning_rate": 0.00019999999789549876,
|
1232 |
-
"loss": 0.1319,
|
1233 |
-
"step": 162
|
1234 |
-
},
|
1235 |
-
{
|
1236 |
-
"epoch": 0.010537373737373738,
|
1237 |
-
"grad_norm": 0.06697044521570206,
|
1238 |
-
"learning_rate": 0.0001999999915819952,
|
1239 |
-
"loss": 0.1136,
|
1240 |
-
"step": 163
|
1241 |
-
},
|
1242 |
-
{
|
1243 |
-
"epoch": 0.010602020202020203,
|
1244 |
-
"grad_norm": 0.0689595639705658,
|
1245 |
-
"learning_rate": 0.00019999998105948953,
|
1246 |
-
"loss": 0.1189,
|
1247 |
-
"step": 164
|
1248 |
-
},
|
1249 |
-
{
|
1250 |
-
"epoch": 0.010666666666666666,
|
1251 |
-
"grad_norm": 0.07707302272319794,
|
1252 |
-
"learning_rate": 0.00019999996632798217,
|
1253 |
-
"loss": 0.1412,
|
1254 |
-
"step": 165
|
1255 |
-
},
|
1256 |
-
{
|
1257 |
-
"epoch": 0.010731313131313132,
|
1258 |
-
"grad_norm": 0.22306282818317413,
|
1259 |
-
"learning_rate": 0.00019999994738747378,
|
1260 |
-
"loss": 0.1657,
|
1261 |
-
"step": 166
|
1262 |
-
},
|
1263 |
-
{
|
1264 |
-
"epoch": 0.010795959595959597,
|
1265 |
-
"grad_norm": 0.09084911644458771,
|
1266 |
-
"learning_rate": 0.00019999992423796515,
|
1267 |
-
"loss": 0.126,
|
1268 |
-
"step": 167
|
1269 |
-
},
|
1270 |
-
{
|
1271 |
-
"epoch": 0.01086060606060606,
|
1272 |
-
"grad_norm": 0.09681031852960587,
|
1273 |
-
"learning_rate": 0.00019999989687945728,
|
1274 |
-
"loss": 0.1303,
|
1275 |
-
"step": 168
|
1276 |
-
},
|
1277 |
-
{
|
1278 |
-
"epoch": 0.010925252525252525,
|
1279 |
-
"grad_norm": 0.12961797416210175,
|
1280 |
-
"learning_rate": 0.0001999998653119513,
|
1281 |
-
"loss": 0.1231,
|
1282 |
-
"step": 169
|
1283 |
-
},
|
1284 |
-
{
|
1285 |
-
"epoch": 0.01098989898989899,
|
1286 |
-
"grad_norm": 0.07255159318447113,
|
1287 |
-
"learning_rate": 0.00019999982953544852,
|
1288 |
-
"loss": 0.1324,
|
1289 |
-
"step": 170
|
1290 |
-
},
|
1291 |
-
{
|
1292 |
-
"epoch": 0.011054545454545454,
|
1293 |
-
"grad_norm": 0.07213090360164642,
|
1294 |
-
"learning_rate": 0.00019999978954995045,
|
1295 |
-
"loss": 0.1243,
|
1296 |
-
"step": 171
|
1297 |
-
},
|
1298 |
-
{
|
1299 |
-
"epoch": 0.01111919191919192,
|
1300 |
-
"grad_norm": 0.058742836117744446,
|
1301 |
-
"learning_rate": 0.0001999997453554588,
|
1302 |
-
"loss": 0.1196,
|
1303 |
-
"step": 172
|
1304 |
-
},
|
1305 |
-
{
|
1306 |
-
"epoch": 0.011183838383838384,
|
1307 |
-
"grad_norm": 0.06557495146989822,
|
1308 |
-
"learning_rate": 0.00019999969695197543,
|
1309 |
-
"loss": 0.1234,
|
1310 |
-
"step": 173
|
1311 |
-
},
|
1312 |
-
{
|
1313 |
-
"epoch": 0.011248484848484848,
|
1314 |
-
"grad_norm": 0.05962904542684555,
|
1315 |
-
"learning_rate": 0.00019999964433950235,
|
1316 |
-
"loss": 0.1465,
|
1317 |
-
"step": 174
|
1318 |
-
},
|
1319 |
-
{
|
1320 |
-
"epoch": 0.011313131313131313,
|
1321 |
-
"grad_norm": 0.11403318494558334,
|
1322 |
-
"learning_rate": 0.00019999958751804178,
|
1323 |
-
"loss": 0.1147,
|
1324 |
-
"step": 175
|
1325 |
-
},
|
1326 |
-
{
|
1327 |
-
"epoch": 0.011377777777777778,
|
1328 |
-
"grad_norm": 0.06283015757799149,
|
1329 |
-
"learning_rate": 0.0001999995264875961,
|
1330 |
-
"loss": 0.1051,
|
1331 |
-
"step": 176
|
1332 |
-
},
|
1333 |
-
{
|
1334 |
-
"epoch": 0.011377777777777778,
|
1335 |
-
"eval_bleu": 0.34108656655271324,
|
1336 |
-
"eval_loss": 0.10132479667663574,
|
1337 |
-
"eval_runtime": 1.4086,
|
1338 |
-
"eval_samples_per_second": 11.359,
|
1339 |
-
"eval_steps_per_second": 2.84,
|
1340 |
-
"step": 176
|
1341 |
-
},
|
1342 |
-
{
|
1343 |
-
"epoch": 0.011442424242424242,
|
1344 |
-
"grad_norm": 0.07057774811983109,
|
1345 |
-
"learning_rate": 0.00019999946124816794,
|
1346 |
-
"loss": 0.1159,
|
1347 |
-
"step": 177
|
1348 |
-
},
|
1349 |
-
{
|
1350 |
-
"epoch": 0.011507070707070707,
|
1351 |
-
"grad_norm": 0.1344994753599167,
|
1352 |
-
"learning_rate": 0.00019999939179975997,
|
1353 |
-
"loss": 0.1123,
|
1354 |
-
"step": 178
|
1355 |
-
},
|
1356 |
-
{
|
1357 |
-
"epoch": 0.011571717171717172,
|
1358 |
-
"grad_norm": 0.053567882627248764,
|
1359 |
-
"learning_rate": 0.00019999931814237515,
|
1360 |
-
"loss": 0.1319,
|
1361 |
-
"step": 179
|
1362 |
-
},
|
1363 |
-
{
|
1364 |
-
"epoch": 0.011636363636363636,
|
1365 |
-
"grad_norm": 0.05020461976528168,
|
1366 |
-
"learning_rate": 0.0001999992402760166,
|
1367 |
-
"loss": 0.1315,
|
1368 |
-
"step": 180
|
1369 |
-
},
|
1370 |
-
{
|
1371 |
-
"epoch": 0.0117010101010101,
|
1372 |
-
"grad_norm": 0.09403225779533386,
|
1373 |
-
"learning_rate": 0.00019999915820068757,
|
1374 |
-
"loss": 0.1275,
|
1375 |
-
"step": 181
|
1376 |
-
},
|
1377 |
-
{
|
1378 |
-
"epoch": 0.011765656565656566,
|
1379 |
-
"grad_norm": 0.07833687961101532,
|
1380 |
-
"learning_rate": 0.0001999990719163915,
|
1381 |
-
"loss": 0.1216,
|
1382 |
-
"step": 182
|
1383 |
-
},
|
1384 |
-
{
|
1385 |
-
"epoch": 0.01183030303030303,
|
1386 |
-
"grad_norm": 0.03991740942001343,
|
1387 |
-
"learning_rate": 0.00019999898142313206,
|
1388 |
-
"loss": 0.1142,
|
1389 |
-
"step": 183
|
1390 |
-
},
|
1391 |
-
{
|
1392 |
-
"epoch": 0.011894949494949495,
|
1393 |
-
"grad_norm": 0.04619375616312027,
|
1394 |
-
"learning_rate": 0.00019999888672091304,
|
1395 |
-
"loss": 0.1103,
|
1396 |
-
"step": 184
|
1397 |
-
},
|
1398 |
-
{
|
1399 |
-
"epoch": 0.01195959595959596,
|
1400 |
-
"grad_norm": 0.037316370755434036,
|
1401 |
-
"learning_rate": 0.0001999987878097384,
|
1402 |
-
"loss": 0.1168,
|
1403 |
-
"step": 185
|
1404 |
-
},
|
1405 |
-
{
|
1406 |
-
"epoch": 0.012024242424242423,
|
1407 |
-
"grad_norm": 0.04291122406721115,
|
1408 |
-
"learning_rate": 0.00019999868468961233,
|
1409 |
-
"loss": 0.1198,
|
1410 |
-
"step": 186
|
1411 |
-
},
|
1412 |
-
{
|
1413 |
-
"epoch": 0.012088888888888889,
|
1414 |
-
"grad_norm": 0.07236277312040329,
|
1415 |
-
"learning_rate": 0.00019999857736053918,
|
1416 |
-
"loss": 0.12,
|
1417 |
-
"step": 187
|
1418 |
-
},
|
1419 |
-
{
|
1420 |
-
"epoch": 0.012153535353535354,
|
1421 |
-
"grad_norm": 0.04166350141167641,
|
1422 |
-
"learning_rate": 0.0001999984658225235,
|
1423 |
-
"loss": 0.1312,
|
1424 |
-
"step": 188
|
1425 |
-
},
|
1426 |
-
{
|
1427 |
-
"epoch": 0.012218181818181819,
|
1428 |
-
"grad_norm": 0.04327237978577614,
|
1429 |
-
"learning_rate": 0.00019999835007556986,
|
1430 |
-
"loss": 0.138,
|
1431 |
-
"step": 189
|
1432 |
-
},
|
1433 |
-
{
|
1434 |
-
"epoch": 0.012282828282828282,
|
1435 |
-
"grad_norm": 0.13315382599830627,
|
1436 |
-
"learning_rate": 0.00019999823011968327,
|
1437 |
-
"loss": 0.1395,
|
1438 |
-
"step": 190
|
1439 |
-
},
|
1440 |
-
{
|
1441 |
-
"epoch": 0.012347474747474748,
|
1442 |
-
"grad_norm": 0.04486257955431938,
|
1443 |
-
"learning_rate": 0.0001999981059548687,
|
1444 |
-
"loss": 0.1281,
|
1445 |
-
"step": 191
|
1446 |
-
},
|
1447 |
-
{
|
1448 |
-
"epoch": 0.012412121212121213,
|
1449 |
-
"grad_norm": 0.06127722188830376,
|
1450 |
-
"learning_rate": 0.0001999979775811314,
|
1451 |
-
"loss": 0.1295,
|
1452 |
-
"step": 192
|
1453 |
-
},
|
1454 |
-
{
|
1455 |
-
"epoch": 0.012412121212121213,
|
1456 |
-
"eval_bleu": 2.797660142073947,
|
1457 |
-
"eval_loss": 0.09842301905155182,
|
1458 |
-
"eval_runtime": 1.3435,
|
1459 |
-
"eval_samples_per_second": 11.909,
|
1460 |
-
"eval_steps_per_second": 2.977,
|
1461 |
-
"step": 192
|
1462 |
-
},
|
1463 |
-
{
|
1464 |
-
"epoch": 0.012476767676767676,
|
1465 |
-
"grad_norm": 0.10934247821569443,
|
1466 |
-
"learning_rate": 0.00019999784499847678,
|
1467 |
-
"loss": 0.119,
|
1468 |
-
"step": 193
|
1469 |
-
},
|
1470 |
-
{
|
1471 |
-
"epoch": 0.012541414141414142,
|
1472 |
-
"grad_norm": 0.04266177862882614,
|
1473 |
-
"learning_rate": 0.0001999977082069104,
|
1474 |
-
"loss": 0.1094,
|
1475 |
-
"step": 194
|
1476 |
-
},
|
1477 |
-
{
|
1478 |
-
"epoch": 0.012606060606060607,
|
1479 |
-
"grad_norm": 0.10852430015802383,
|
1480 |
-
"learning_rate": 0.00019999756720643803,
|
1481 |
-
"loss": 0.1118,
|
1482 |
-
"step": 195
|
1483 |
-
},
|
1484 |
-
{
|
1485 |
-
"epoch": 0.01267070707070707,
|
1486 |
-
"grad_norm": 0.06190445274114609,
|
1487 |
-
"learning_rate": 0.0001999974219970656,
|
1488 |
-
"loss": 0.129,
|
1489 |
-
"step": 196
|
1490 |
-
},
|
1491 |
-
{
|
1492 |
-
"epoch": 0.012735353535353535,
|
1493 |
-
"grad_norm": 0.04268389567732811,
|
1494 |
-
"learning_rate": 0.00019999727257879923,
|
1495 |
-
"loss": 0.1149,
|
1496 |
-
"step": 197
|
1497 |
-
},
|
1498 |
-
{
|
1499 |
-
"epoch": 0.0128,
|
1500 |
-
"grad_norm": 0.04210319742560387,
|
1501 |
-
"learning_rate": 0.0001999971189516452,
|
1502 |
-
"loss": 0.1231,
|
1503 |
-
"step": 198
|
1504 |
-
},
|
1505 |
-
{
|
1506 |
-
"epoch": 0.012864646464646464,
|
1507 |
-
"grad_norm": 0.07176094502210617,
|
1508 |
-
"learning_rate": 0.00019999696111561,
|
1509 |
-
"loss": 0.1123,
|
1510 |
-
"step": 199
|
1511 |
-
},
|
1512 |
-
{
|
1513 |
-
"epoch": 0.01292929292929293,
|
1514 |
-
"grad_norm": 0.04062803462147713,
|
1515 |
-
"learning_rate": 0.00019999679907070023,
|
1516 |
-
"loss": 0.1225,
|
1517 |
-
"step": 200
|
1518 |
-
},
|
1519 |
-
{
|
1520 |
-
"epoch": 0.012993939393939394,
|
1521 |
-
"grad_norm": 0.04266968369483948,
|
1522 |
-
"learning_rate": 0.00019999663281692275,
|
1523 |
-
"loss": 0.1259,
|
1524 |
-
"step": 201
|
1525 |
-
},
|
1526 |
-
{
|
1527 |
-
"epoch": 0.013058585858585858,
|
1528 |
-
"grad_norm": 0.045373089611530304,
|
1529 |
-
"learning_rate": 0.00019999646235428452,
|
1530 |
-
"loss": 0.1353,
|
1531 |
-
"step": 202
|
1532 |
-
},
|
1533 |
-
{
|
1534 |
-
"epoch": 0.013123232323232323,
|
1535 |
-
"grad_norm": 0.04623784124851227,
|
1536 |
-
"learning_rate": 0.00019999628768279276,
|
1537 |
-
"loss": 0.1224,
|
1538 |
-
"step": 203
|
1539 |
-
},
|
1540 |
-
{
|
1541 |
-
"epoch": 0.013187878787878788,
|
1542 |
-
"grad_norm": 0.03664301335811615,
|
1543 |
-
"learning_rate": 0.0001999961088024548,
|
1544 |
-
"loss": 0.1361,
|
1545 |
-
"step": 204
|
1546 |
-
},
|
1547 |
-
{
|
1548 |
-
"epoch": 0.013252525252525252,
|
1549 |
-
"grad_norm": 0.03849755972623825,
|
1550 |
-
"learning_rate": 0.00019999592571327815,
|
1551 |
-
"loss": 0.1307,
|
1552 |
-
"step": 205
|
1553 |
-
},
|
1554 |
-
{
|
1555 |
-
"epoch": 0.013317171717171717,
|
1556 |
-
"grad_norm": 0.03995022922754288,
|
1557 |
-
"learning_rate": 0.00019999573841527054,
|
1558 |
-
"loss": 0.1079,
|
1559 |
-
"step": 206
|
1560 |
-
},
|
1561 |
-
{
|
1562 |
-
"epoch": 0.013381818181818182,
|
1563 |
-
"grad_norm": 0.039675675332546234,
|
1564 |
-
"learning_rate": 0.00019999554690843988,
|
1565 |
-
"loss": 0.1212,
|
1566 |
-
"step": 207
|
1567 |
-
},
|
1568 |
-
{
|
1569 |
-
"epoch": 0.013446464646464646,
|
1570 |
-
"grad_norm": 0.05080877244472504,
|
1571 |
-
"learning_rate": 0.00019999535119279415,
|
1572 |
-
"loss": 0.0991,
|
1573 |
-
"step": 208
|
1574 |
-
},
|
1575 |
-
{
|
1576 |
-
"epoch": 0.013446464646464646,
|
1577 |
-
"eval_bleu": 3.66705872401506,
|
1578 |
-
"eval_loss": 0.09652489423751831,
|
1579 |
-
"eval_runtime": 1.3632,
|
1580 |
-
"eval_samples_per_second": 11.737,
|
1581 |
-
"eval_steps_per_second": 2.934,
|
1582 |
-
"step": 208
|
1583 |
}
|
1584 |
],
|
1585 |
"logging_steps": 1,
|
1586 |
-
"max_steps":
|
1587 |
"num_input_tokens_seen": 0,
|
1588 |
-
"num_train_epochs":
|
1589 |
"save_steps": 16,
|
1590 |
"stateful_callbacks": {
|
1591 |
"TrainerControl": {
|
@@ -1599,8 +147,8 @@
|
|
1599 |
"attributes": {}
|
1600 |
}
|
1601 |
},
|
1602 |
-
"total_flos":
|
1603 |
-
"train_batch_size":
|
1604 |
"trial_name": null,
|
1605 |
"trial_params": null
|
1606 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.0010343434343434343,
|
5 |
"eval_steps": 16,
|
6 |
+
"global_step": 16,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
12 |
"epoch": 6.464646464646465e-05,
|
13 |
"grad_norm": NaN,
|
14 |
"learning_rate": 0.0,
|
15 |
+
"loss": 29.6213,
|
16 |
"step": 1
|
17 |
},
|
18 |
{
|
19 |
"epoch": 0.0001292929292929293,
|
20 |
"grad_norm": NaN,
|
21 |
"learning_rate": 0.0,
|
22 |
+
"loss": 29.6208,
|
23 |
"step": 2
|
24 |
},
|
25 |
{
|
26 |
"epoch": 0.00019393939393939395,
|
27 |
+
"grad_norm": 129.40235900878906,
|
28 |
+
"learning_rate": 4.3010752688172043e-07,
|
29 |
+
"loss": 29.5846,
|
30 |
"step": 3
|
31 |
},
|
32 |
{
|
33 |
"epoch": 0.0002585858585858586,
|
34 |
+
"grad_norm": Infinity,
|
35 |
+
"learning_rate": 4.3010752688172043e-07,
|
36 |
+
"loss": 29.7161,
|
37 |
"step": 4
|
38 |
},
|
39 |
{
|
40 |
"epoch": 0.00032323232323232324,
|
41 |
+
"grad_norm": 130.79031372070312,
|
42 |
+
"learning_rate": 8.602150537634409e-07,
|
43 |
+
"loss": 29.7196,
|
44 |
"step": 5
|
45 |
},
|
46 |
{
|
47 |
"epoch": 0.0003878787878787879,
|
48 |
+
"grad_norm": 123.62369537353516,
|
49 |
+
"learning_rate": 1.2903225806451614e-06,
|
50 |
+
"loss": 29.2487,
|
51 |
"step": 6
|
52 |
},
|
53 |
{
|
54 |
"epoch": 0.0004525252525252525,
|
55 |
+
"grad_norm": 135.1348876953125,
|
56 |
+
"learning_rate": 1.7204301075268817e-06,
|
57 |
+
"loss": 29.6055,
|
58 |
"step": 7
|
59 |
},
|
60 |
{
|
61 |
"epoch": 0.0005171717171717172,
|
62 |
+
"grad_norm": Infinity,
|
63 |
+
"learning_rate": 1.7204301075268817e-06,
|
64 |
+
"loss": 28.568,
|
65 |
"step": 8
|
66 |
},
|
67 |
{
|
68 |
"epoch": 0.0005818181818181818,
|
69 |
+
"grad_norm": 270.385498046875,
|
70 |
+
"learning_rate": 2.1505376344086023e-06,
|
71 |
+
"loss": 29.7127,
|
72 |
"step": 9
|
73 |
},
|
74 |
{
|
75 |
"epoch": 0.0006464646464646465,
|
76 |
+
"grad_norm": 109.7217788696289,
|
77 |
+
"learning_rate": 2.580645161290323e-06,
|
78 |
+
"loss": 29.2279,
|
79 |
"step": 10
|
80 |
},
|
81 |
{
|
82 |
"epoch": 0.0007111111111111111,
|
83 |
+
"grad_norm": 403.46337890625,
|
84 |
+
"learning_rate": 3.0107526881720433e-06,
|
85 |
+
"loss": 28.7925,
|
86 |
"step": 11
|
87 |
},
|
88 |
{
|
89 |
"epoch": 0.0007757575757575758,
|
90 |
+
"grad_norm": 114.81087493896484,
|
91 |
+
"learning_rate": 3.4408602150537635e-06,
|
92 |
+
"loss": 28.0663,
|
93 |
"step": 12
|
94 |
},
|
95 |
{
|
96 |
"epoch": 0.0008404040404040404,
|
97 |
+
"grad_norm": 240.21282958984375,
|
98 |
+
"learning_rate": 3.870967741935484e-06,
|
99 |
+
"loss": 27.3475,
|
100 |
"step": 13
|
101 |
},
|
102 |
{
|
103 |
"epoch": 0.000905050505050505,
|
104 |
+
"grad_norm": 236.48675537109375,
|
105 |
+
"learning_rate": 4.3010752688172045e-06,
|
106 |
+
"loss": 26.9302,
|
107 |
"step": 14
|
108 |
},
|
109 |
{
|
110 |
"epoch": 0.0009696969696969697,
|
111 |
+
"grad_norm": 203.8461456298828,
|
112 |
+
"learning_rate": 4.731182795698925e-06,
|
113 |
+
"loss": 24.0135,
|
114 |
"step": 15
|
115 |
},
|
116 |
{
|
117 |
"epoch": 0.0010343434343434343,
|
118 |
+
"grad_norm": 156.15663146972656,
|
119 |
+
"learning_rate": 5.161290322580646e-06,
|
120 |
+
"loss": 22.7445,
|
121 |
"step": 16
|
122 |
},
|
123 |
{
|
124 |
"epoch": 0.0010343434343434343,
|
125 |
+
"eval_bleu": 0.11919568898736486,
|
126 |
+
"eval_loss": 21.227251052856445,
|
127 |
+
"eval_runtime": 2.9098,
|
128 |
+
"eval_samples_per_second": 10.997,
|
129 |
+
"eval_steps_per_second": 1.375,
|
130 |
"step": 16
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
}
|
132 |
],
|
133 |
"logging_steps": 1,
|
134 |
+
"max_steps": 46404,
|
135 |
"num_input_tokens_seen": 0,
|
136 |
+
"num_train_epochs": 3,
|
137 |
"save_steps": 16,
|
138 |
"stateful_callbacks": {
|
139 |
"TrainerControl": {
|
|
|
147 |
"attributes": {}
|
148 |
}
|
149 |
},
|
150 |
+
"total_flos": 311786439966720.0,
|
151 |
+
"train_batch_size": 8,
|
152 |
"trial_name": null,
|
153 |
"trial_params": null
|
154 |
}
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f3a0792a04fef5ea618963940e094b8c3947590c45a58f3ea50f2fde6e0f3e8
|
3 |
size 5240
|