WangXFng commited on
Commit
b4fa4b4
1 Parent(s): f76d53e

Model save

Browse files
README.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: meta-llama/Llama-2-7b-hf
3
+ library_name: peft
4
+ license: llama2
5
+ tags:
6
+ - generated_from_trainer
7
+ model-index:
8
+ - name: Instruments-8bit-8B-4Epoch
9
+ results: []
10
+ ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ # Instruments-8bit-8B-4Epoch
16
+
17
+ This model is a fine-tuned version of [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) on an unknown dataset.
18
+
19
+ ## Model description
20
+
21
+ More information needed
22
+
23
+ ## Intended uses & limitations
24
+
25
+ More information needed
26
+
27
+ ## Training and evaluation data
28
+
29
+ More information needed
30
+
31
+ ## Training procedure
32
+
33
+ ### Training hyperparameters
34
+
35
+ The following hyperparameters were used during training:
36
+ - learning_rate: 0.0001
37
+ - train_batch_size: 16
38
+ - eval_batch_size: 8
39
+ - seed: 42
40
+ - gradient_accumulation_steps: 16
41
+ - total_train_batch_size: 256
42
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
+ - lr_scheduler_type: linear
44
+ - lr_scheduler_warmup_steps: 2
45
+ - num_epochs: 4
46
+
47
+ ### Training results
48
+
49
+
50
+
51
+ ### Framework versions
52
+
53
+ - PEFT 0.13.0
54
+ - Transformers 4.45.2
55
+ - Pytorch 2.4.0
56
+ - Tokenizers 0.20.0
adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 32,
14
+ "lora_dropout": 0.05,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "o_proj",
24
+ "q_proj",
25
+ "down_proj",
26
+ "up_proj",
27
+ "v_proj",
28
+ "k_proj",
29
+ "gate_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74b824b773ff9a426b5ebaaad8c4f64308d00fe2897db2575050ef68d0592497
3
+ size 619341864
added_tokens.json ADDED
@@ -0,0 +1,920 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<a_0>": 32000,
3
+ "<a_100>": 32001,
4
+ "<a_102>": 32002,
5
+ "<a_103>": 32003,
6
+ "<a_106>": 32004,
7
+ "<a_109>": 32005,
8
+ "<a_110>": 32006,
9
+ "<a_111>": 32007,
10
+ "<a_115>": 32008,
11
+ "<a_117>": 32009,
12
+ "<a_118>": 32010,
13
+ "<a_119>": 32011,
14
+ "<a_11>": 32012,
15
+ "<a_120>": 32013,
16
+ "<a_121>": 32014,
17
+ "<a_122>": 32015,
18
+ "<a_123>": 32016,
19
+ "<a_124>": 32017,
20
+ "<a_126>": 32018,
21
+ "<a_127>": 32019,
22
+ "<a_128>": 32020,
23
+ "<a_129>": 32021,
24
+ "<a_12>": 32022,
25
+ "<a_130>": 32023,
26
+ "<a_134>": 32024,
27
+ "<a_135>": 32025,
28
+ "<a_136>": 32026,
29
+ "<a_138>": 32027,
30
+ "<a_139>": 32028,
31
+ "<a_140>": 32029,
32
+ "<a_141>": 32030,
33
+ "<a_142>": 32031,
34
+ "<a_143>": 32032,
35
+ "<a_144>": 32033,
36
+ "<a_145>": 32034,
37
+ "<a_146>": 32035,
38
+ "<a_149>": 32036,
39
+ "<a_150>": 32037,
40
+ "<a_153>": 32038,
41
+ "<a_157>": 32039,
42
+ "<a_160>": 32040,
43
+ "<a_162>": 32041,
44
+ "<a_163>": 32042,
45
+ "<a_166>": 32043,
46
+ "<a_167>": 32044,
47
+ "<a_16>": 32045,
48
+ "<a_171>": 32046,
49
+ "<a_172>": 32047,
50
+ "<a_174>": 32048,
51
+ "<a_175>": 32049,
52
+ "<a_177>": 32050,
53
+ "<a_178>": 32051,
54
+ "<a_179>": 32052,
55
+ "<a_17>": 32053,
56
+ "<a_181>": 32054,
57
+ "<a_183>": 32055,
58
+ "<a_184>": 32056,
59
+ "<a_185>": 32057,
60
+ "<a_187>": 32058,
61
+ "<a_188>": 32059,
62
+ "<a_189>": 32060,
63
+ "<a_18>": 32061,
64
+ "<a_190>": 32062,
65
+ "<a_192>": 32063,
66
+ "<a_193>": 32064,
67
+ "<a_194>": 32065,
68
+ "<a_195>": 32066,
69
+ "<a_196>": 32067,
70
+ "<a_19>": 32068,
71
+ "<a_202>": 32069,
72
+ "<a_206>": 32070,
73
+ "<a_207>": 32071,
74
+ "<a_208>": 32072,
75
+ "<a_20>": 32073,
76
+ "<a_210>": 32074,
77
+ "<a_212>": 32075,
78
+ "<a_213>": 32076,
79
+ "<a_217>": 32077,
80
+ "<a_218>": 32078,
81
+ "<a_219>": 32079,
82
+ "<a_221>": 32080,
83
+ "<a_223>": 32081,
84
+ "<a_225>": 32082,
85
+ "<a_229>": 32083,
86
+ "<a_22>": 32084,
87
+ "<a_230>": 32085,
88
+ "<a_233>": 32086,
89
+ "<a_234>": 32087,
90
+ "<a_235>": 32088,
91
+ "<a_236>": 32089,
92
+ "<a_237>": 32090,
93
+ "<a_238>": 32091,
94
+ "<a_241>": 32092,
95
+ "<a_242>": 32093,
96
+ "<a_243>": 32094,
97
+ "<a_244>": 32095,
98
+ "<a_245>": 32096,
99
+ "<a_246>": 32097,
100
+ "<a_250>": 32098,
101
+ "<a_251>": 32099,
102
+ "<a_253>": 32100,
103
+ "<a_254>": 32101,
104
+ "<a_255>": 32102,
105
+ "<a_25>": 32103,
106
+ "<a_26>": 32104,
107
+ "<a_27>": 32105,
108
+ "<a_28>": 32106,
109
+ "<a_29>": 32107,
110
+ "<a_30>": 32108,
111
+ "<a_31>": 32109,
112
+ "<a_32>": 32110,
113
+ "<a_34>": 32111,
114
+ "<a_35>": 32112,
115
+ "<a_36>": 32113,
116
+ "<a_38>": 32114,
117
+ "<a_43>": 32115,
118
+ "<a_44>": 32116,
119
+ "<a_46>": 32117,
120
+ "<a_47>": 32118,
121
+ "<a_49>": 32119,
122
+ "<a_51>": 32120,
123
+ "<a_54>": 32121,
124
+ "<a_55>": 32122,
125
+ "<a_56>": 32123,
126
+ "<a_57>": 32124,
127
+ "<a_58>": 32125,
128
+ "<a_5>": 32126,
129
+ "<a_60>": 32127,
130
+ "<a_64>": 32128,
131
+ "<a_65>": 32129,
132
+ "<a_67>": 32130,
133
+ "<a_68>": 32131,
134
+ "<a_69>": 32132,
135
+ "<a_6>": 32133,
136
+ "<a_71>": 32134,
137
+ "<a_72>": 32135,
138
+ "<a_77>": 32136,
139
+ "<a_78>": 32137,
140
+ "<a_79>": 32138,
141
+ "<a_80>": 32139,
142
+ "<a_83>": 32140,
143
+ "<a_84>": 32141,
144
+ "<a_86>": 32142,
145
+ "<a_87>": 32143,
146
+ "<a_88>": 32144,
147
+ "<a_8>": 32145,
148
+ "<a_90>": 32146,
149
+ "<a_92>": 32147,
150
+ "<a_96>": 32148,
151
+ "<a_98>": 32149,
152
+ "<b_0>": 32150,
153
+ "<b_100>": 32151,
154
+ "<b_101>": 32152,
155
+ "<b_102>": 32153,
156
+ "<b_103>": 32154,
157
+ "<b_104>": 32155,
158
+ "<b_105>": 32156,
159
+ "<b_106>": 32157,
160
+ "<b_107>": 32158,
161
+ "<b_108>": 32159,
162
+ "<b_109>": 32160,
163
+ "<b_10>": 32161,
164
+ "<b_110>": 32162,
165
+ "<b_111>": 32163,
166
+ "<b_112>": 32164,
167
+ "<b_113>": 32165,
168
+ "<b_114>": 32166,
169
+ "<b_115>": 32167,
170
+ "<b_116>": 32168,
171
+ "<b_117>": 32169,
172
+ "<b_118>": 32170,
173
+ "<b_119>": 32171,
174
+ "<b_11>": 32172,
175
+ "<b_120>": 32173,
176
+ "<b_121>": 32174,
177
+ "<b_122>": 32175,
178
+ "<b_123>": 32176,
179
+ "<b_124>": 32177,
180
+ "<b_125>": 32178,
181
+ "<b_126>": 32179,
182
+ "<b_127>": 32180,
183
+ "<b_128>": 32181,
184
+ "<b_129>": 32182,
185
+ "<b_12>": 32183,
186
+ "<b_130>": 32184,
187
+ "<b_131>": 32185,
188
+ "<b_132>": 32186,
189
+ "<b_133>": 32187,
190
+ "<b_134>": 32188,
191
+ "<b_135>": 32189,
192
+ "<b_136>": 32190,
193
+ "<b_137>": 32191,
194
+ "<b_138>": 32192,
195
+ "<b_139>": 32193,
196
+ "<b_13>": 32194,
197
+ "<b_140>": 32195,
198
+ "<b_141>": 32196,
199
+ "<b_142>": 32197,
200
+ "<b_143>": 32198,
201
+ "<b_144>": 32199,
202
+ "<b_145>": 32200,
203
+ "<b_146>": 32201,
204
+ "<b_147>": 32202,
205
+ "<b_148>": 32203,
206
+ "<b_149>": 32204,
207
+ "<b_14>": 32205,
208
+ "<b_150>": 32206,
209
+ "<b_151>": 32207,
210
+ "<b_152>": 32208,
211
+ "<b_153>": 32209,
212
+ "<b_154>": 32210,
213
+ "<b_155>": 32211,
214
+ "<b_156>": 32212,
215
+ "<b_157>": 32213,
216
+ "<b_158>": 32214,
217
+ "<b_159>": 32215,
218
+ "<b_15>": 32216,
219
+ "<b_160>": 32217,
220
+ "<b_161>": 32218,
221
+ "<b_162>": 32219,
222
+ "<b_163>": 32220,
223
+ "<b_164>": 32221,
224
+ "<b_165>": 32222,
225
+ "<b_166>": 32223,
226
+ "<b_167>": 32224,
227
+ "<b_168>": 32225,
228
+ "<b_169>": 32226,
229
+ "<b_16>": 32227,
230
+ "<b_170>": 32228,
231
+ "<b_171>": 32229,
232
+ "<b_172>": 32230,
233
+ "<b_173>": 32231,
234
+ "<b_174>": 32232,
235
+ "<b_175>": 32233,
236
+ "<b_176>": 32234,
237
+ "<b_177>": 32235,
238
+ "<b_178>": 32236,
239
+ "<b_179>": 32237,
240
+ "<b_17>": 32238,
241
+ "<b_180>": 32239,
242
+ "<b_181>": 32240,
243
+ "<b_182>": 32241,
244
+ "<b_183>": 32242,
245
+ "<b_184>": 32243,
246
+ "<b_185>": 32244,
247
+ "<b_186>": 32245,
248
+ "<b_187>": 32246,
249
+ "<b_188>": 32247,
250
+ "<b_189>": 32248,
251
+ "<b_18>": 32249,
252
+ "<b_190>": 32250,
253
+ "<b_191>": 32251,
254
+ "<b_192>": 32252,
255
+ "<b_193>": 32253,
256
+ "<b_194>": 32254,
257
+ "<b_195>": 32255,
258
+ "<b_196>": 32256,
259
+ "<b_197>": 32257,
260
+ "<b_198>": 32258,
261
+ "<b_199>": 32259,
262
+ "<b_19>": 32260,
263
+ "<b_1>": 32261,
264
+ "<b_200>": 32262,
265
+ "<b_201>": 32263,
266
+ "<b_202>": 32264,
267
+ "<b_203>": 32265,
268
+ "<b_204>": 32266,
269
+ "<b_205>": 32267,
270
+ "<b_206>": 32268,
271
+ "<b_207>": 32269,
272
+ "<b_208>": 32270,
273
+ "<b_209>": 32271,
274
+ "<b_20>": 32272,
275
+ "<b_210>": 32273,
276
+ "<b_211>": 32274,
277
+ "<b_212>": 32275,
278
+ "<b_213>": 32276,
279
+ "<b_214>": 32277,
280
+ "<b_215>": 32278,
281
+ "<b_216>": 32279,
282
+ "<b_217>": 32280,
283
+ "<b_218>": 32281,
284
+ "<b_219>": 32282,
285
+ "<b_21>": 32283,
286
+ "<b_220>": 32284,
287
+ "<b_221>": 32285,
288
+ "<b_222>": 32286,
289
+ "<b_223>": 32287,
290
+ "<b_224>": 32288,
291
+ "<b_225>": 32289,
292
+ "<b_226>": 32290,
293
+ "<b_227>": 32291,
294
+ "<b_228>": 32292,
295
+ "<b_229>": 32293,
296
+ "<b_22>": 32294,
297
+ "<b_230>": 32295,
298
+ "<b_231>": 32296,
299
+ "<b_232>": 32297,
300
+ "<b_233>": 32298,
301
+ "<b_234>": 32299,
302
+ "<b_235>": 32300,
303
+ "<b_236>": 32301,
304
+ "<b_237>": 32302,
305
+ "<b_238>": 32303,
306
+ "<b_239>": 32304,
307
+ "<b_23>": 32305,
308
+ "<b_240>": 32306,
309
+ "<b_241>": 32307,
310
+ "<b_242>": 32308,
311
+ "<b_243>": 32309,
312
+ "<b_244>": 32310,
313
+ "<b_245>": 32311,
314
+ "<b_246>": 32312,
315
+ "<b_247>": 32313,
316
+ "<b_248>": 32314,
317
+ "<b_249>": 32315,
318
+ "<b_24>": 32316,
319
+ "<b_250>": 32317,
320
+ "<b_251>": 32318,
321
+ "<b_252>": 32319,
322
+ "<b_253>": 32320,
323
+ "<b_254>": 32321,
324
+ "<b_255>": 32322,
325
+ "<b_25>": 32323,
326
+ "<b_26>": 32324,
327
+ "<b_27>": 32325,
328
+ "<b_28>": 32326,
329
+ "<b_29>": 32327,
330
+ "<b_2>": 32328,
331
+ "<b_30>": 32329,
332
+ "<b_31>": 32330,
333
+ "<b_32>": 32331,
334
+ "<b_33>": 32332,
335
+ "<b_34>": 32333,
336
+ "<b_35>": 32334,
337
+ "<b_36>": 32335,
338
+ "<b_37>": 32336,
339
+ "<b_38>": 32337,
340
+ "<b_39>": 32338,
341
+ "<b_3>": 32339,
342
+ "<b_40>": 32340,
343
+ "<b_41>": 32341,
344
+ "<b_42>": 32342,
345
+ "<b_43>": 32343,
346
+ "<b_44>": 32344,
347
+ "<b_45>": 32345,
348
+ "<b_46>": 32346,
349
+ "<b_47>": 32347,
350
+ "<b_48>": 32348,
351
+ "<b_49>": 32349,
352
+ "<b_4>": 32350,
353
+ "<b_50>": 32351,
354
+ "<b_51>": 32352,
355
+ "<b_52>": 32353,
356
+ "<b_53>": 32354,
357
+ "<b_54>": 32355,
358
+ "<b_55>": 32356,
359
+ "<b_56>": 32357,
360
+ "<b_57>": 32358,
361
+ "<b_58>": 32359,
362
+ "<b_59>": 32360,
363
+ "<b_5>": 32361,
364
+ "<b_60>": 32362,
365
+ "<b_61>": 32363,
366
+ "<b_62>": 32364,
367
+ "<b_63>": 32365,
368
+ "<b_64>": 32366,
369
+ "<b_65>": 32367,
370
+ "<b_66>": 32368,
371
+ "<b_67>": 32369,
372
+ "<b_68>": 32370,
373
+ "<b_69>": 32371,
374
+ "<b_6>": 32372,
375
+ "<b_70>": 32373,
376
+ "<b_71>": 32374,
377
+ "<b_72>": 32375,
378
+ "<b_73>": 32376,
379
+ "<b_74>": 32377,
380
+ "<b_75>": 32378,
381
+ "<b_76>": 32379,
382
+ "<b_77>": 32380,
383
+ "<b_78>": 32381,
384
+ "<b_79>": 32382,
385
+ "<b_7>": 32383,
386
+ "<b_80>": 32384,
387
+ "<b_81>": 32385,
388
+ "<b_82>": 32386,
389
+ "<b_83>": 32387,
390
+ "<b_84>": 32388,
391
+ "<b_85>": 32389,
392
+ "<b_86>": 32390,
393
+ "<b_87>": 32391,
394
+ "<b_88>": 32392,
395
+ "<b_89>": 32393,
396
+ "<b_8>": 32394,
397
+ "<b_90>": 32395,
398
+ "<b_91>": 32396,
399
+ "<b_92>": 32397,
400
+ "<b_93>": 32398,
401
+ "<b_94>": 32399,
402
+ "<b_95>": 32400,
403
+ "<b_96>": 32401,
404
+ "<b_97>": 32402,
405
+ "<b_98>": 32403,
406
+ "<b_99>": 32404,
407
+ "<b_9>": 32405,
408
+ "<c_0>": 32406,
409
+ "<c_100>": 32407,
410
+ "<c_101>": 32408,
411
+ "<c_102>": 32409,
412
+ "<c_103>": 32410,
413
+ "<c_104>": 32411,
414
+ "<c_105>": 32412,
415
+ "<c_106>": 32413,
416
+ "<c_107>": 32414,
417
+ "<c_108>": 32415,
418
+ "<c_109>": 32416,
419
+ "<c_10>": 32417,
420
+ "<c_110>": 32418,
421
+ "<c_111>": 32419,
422
+ "<c_112>": 32420,
423
+ "<c_113>": 32421,
424
+ "<c_114>": 32422,
425
+ "<c_115>": 32423,
426
+ "<c_116>": 32424,
427
+ "<c_117>": 32425,
428
+ "<c_118>": 32426,
429
+ "<c_119>": 32427,
430
+ "<c_11>": 32428,
431
+ "<c_120>": 32429,
432
+ "<c_121>": 32430,
433
+ "<c_122>": 32431,
434
+ "<c_123>": 32432,
435
+ "<c_124>": 32433,
436
+ "<c_125>": 32434,
437
+ "<c_126>": 32435,
438
+ "<c_127>": 32436,
439
+ "<c_128>": 32437,
440
+ "<c_129>": 32438,
441
+ "<c_12>": 32439,
442
+ "<c_130>": 32440,
443
+ "<c_131>": 32441,
444
+ "<c_132>": 32442,
445
+ "<c_133>": 32443,
446
+ "<c_134>": 32444,
447
+ "<c_135>": 32445,
448
+ "<c_136>": 32446,
449
+ "<c_137>": 32447,
450
+ "<c_138>": 32448,
451
+ "<c_139>": 32449,
452
+ "<c_13>": 32450,
453
+ "<c_140>": 32451,
454
+ "<c_141>": 32452,
455
+ "<c_142>": 32453,
456
+ "<c_143>": 32454,
457
+ "<c_144>": 32455,
458
+ "<c_145>": 32456,
459
+ "<c_146>": 32457,
460
+ "<c_147>": 32458,
461
+ "<c_148>": 32459,
462
+ "<c_149>": 32460,
463
+ "<c_14>": 32461,
464
+ "<c_150>": 32462,
465
+ "<c_151>": 32463,
466
+ "<c_152>": 32464,
467
+ "<c_153>": 32465,
468
+ "<c_154>": 32466,
469
+ "<c_155>": 32467,
470
+ "<c_156>": 32468,
471
+ "<c_157>": 32469,
472
+ "<c_158>": 32470,
473
+ "<c_159>": 32471,
474
+ "<c_15>": 32472,
475
+ "<c_160>": 32473,
476
+ "<c_161>": 32474,
477
+ "<c_162>": 32475,
478
+ "<c_163>": 32476,
479
+ "<c_164>": 32477,
480
+ "<c_165>": 32478,
481
+ "<c_166>": 32479,
482
+ "<c_167>": 32480,
483
+ "<c_168>": 32481,
484
+ "<c_169>": 32482,
485
+ "<c_16>": 32483,
486
+ "<c_170>": 32484,
487
+ "<c_171>": 32485,
488
+ "<c_172>": 32486,
489
+ "<c_173>": 32487,
490
+ "<c_174>": 32488,
491
+ "<c_175>": 32489,
492
+ "<c_176>": 32490,
493
+ "<c_177>": 32491,
494
+ "<c_178>": 32492,
495
+ "<c_179>": 32493,
496
+ "<c_17>": 32494,
497
+ "<c_180>": 32495,
498
+ "<c_181>": 32496,
499
+ "<c_182>": 32497,
500
+ "<c_183>": 32498,
501
+ "<c_184>": 32499,
502
+ "<c_185>": 32500,
503
+ "<c_186>": 32501,
504
+ "<c_187>": 32502,
505
+ "<c_188>": 32503,
506
+ "<c_189>": 32504,
507
+ "<c_18>": 32505,
508
+ "<c_190>": 32506,
509
+ "<c_191>": 32507,
510
+ "<c_192>": 32508,
511
+ "<c_193>": 32509,
512
+ "<c_194>": 32510,
513
+ "<c_195>": 32511,
514
+ "<c_196>": 32512,
515
+ "<c_197>": 32513,
516
+ "<c_198>": 32514,
517
+ "<c_199>": 32515,
518
+ "<c_19>": 32516,
519
+ "<c_1>": 32517,
520
+ "<c_200>": 32518,
521
+ "<c_201>": 32519,
522
+ "<c_202>": 32520,
523
+ "<c_203>": 32521,
524
+ "<c_204>": 32522,
525
+ "<c_205>": 32523,
526
+ "<c_206>": 32524,
527
+ "<c_207>": 32525,
528
+ "<c_208>": 32526,
529
+ "<c_209>": 32527,
530
+ "<c_20>": 32528,
531
+ "<c_210>": 32529,
532
+ "<c_211>": 32530,
533
+ "<c_212>": 32531,
534
+ "<c_213>": 32532,
535
+ "<c_214>": 32533,
536
+ "<c_215>": 32534,
537
+ "<c_216>": 32535,
538
+ "<c_217>": 32536,
539
+ "<c_218>": 32537,
540
+ "<c_219>": 32538,
541
+ "<c_21>": 32539,
542
+ "<c_220>": 32540,
543
+ "<c_221>": 32541,
544
+ "<c_222>": 32542,
545
+ "<c_223>": 32543,
546
+ "<c_224>": 32544,
547
+ "<c_225>": 32545,
548
+ "<c_226>": 32546,
549
+ "<c_227>": 32547,
550
+ "<c_228>": 32548,
551
+ "<c_229>": 32549,
552
+ "<c_22>": 32550,
553
+ "<c_230>": 32551,
554
+ "<c_231>": 32552,
555
+ "<c_232>": 32553,
556
+ "<c_233>": 32554,
557
+ "<c_234>": 32555,
558
+ "<c_235>": 32556,
559
+ "<c_236>": 32557,
560
+ "<c_237>": 32558,
561
+ "<c_238>": 32559,
562
+ "<c_239>": 32560,
563
+ "<c_23>": 32561,
564
+ "<c_240>": 32562,
565
+ "<c_241>": 32563,
566
+ "<c_242>": 32564,
567
+ "<c_243>": 32565,
568
+ "<c_244>": 32566,
569
+ "<c_245>": 32567,
570
+ "<c_246>": 32568,
571
+ "<c_247>": 32569,
572
+ "<c_248>": 32570,
573
+ "<c_249>": 32571,
574
+ "<c_24>": 32572,
575
+ "<c_250>": 32573,
576
+ "<c_251>": 32574,
577
+ "<c_252>": 32575,
578
+ "<c_253>": 32576,
579
+ "<c_254>": 32577,
580
+ "<c_255>": 32578,
581
+ "<c_25>": 32579,
582
+ "<c_26>": 32580,
583
+ "<c_27>": 32581,
584
+ "<c_28>": 32582,
585
+ "<c_29>": 32583,
586
+ "<c_2>": 32584,
587
+ "<c_30>": 32585,
588
+ "<c_31>": 32586,
589
+ "<c_32>": 32587,
590
+ "<c_33>": 32588,
591
+ "<c_34>": 32589,
592
+ "<c_35>": 32590,
593
+ "<c_36>": 32591,
594
+ "<c_37>": 32592,
595
+ "<c_38>": 32593,
596
+ "<c_39>": 32594,
597
+ "<c_3>": 32595,
598
+ "<c_40>": 32596,
599
+ "<c_41>": 32597,
600
+ "<c_42>": 32598,
601
+ "<c_43>": 32599,
602
+ "<c_44>": 32600,
603
+ "<c_45>": 32601,
604
+ "<c_46>": 32602,
605
+ "<c_47>": 32603,
606
+ "<c_48>": 32604,
607
+ "<c_49>": 32605,
608
+ "<c_4>": 32606,
609
+ "<c_50>": 32607,
610
+ "<c_51>": 32608,
611
+ "<c_52>": 32609,
612
+ "<c_53>": 32610,
613
+ "<c_54>": 32611,
614
+ "<c_55>": 32612,
615
+ "<c_56>": 32613,
616
+ "<c_57>": 32614,
617
+ "<c_58>": 32615,
618
+ "<c_59>": 32616,
619
+ "<c_5>": 32617,
620
+ "<c_60>": 32618,
621
+ "<c_61>": 32619,
622
+ "<c_62>": 32620,
623
+ "<c_63>": 32621,
624
+ "<c_64>": 32622,
625
+ "<c_65>": 32623,
626
+ "<c_66>": 32624,
627
+ "<c_67>": 32625,
628
+ "<c_68>": 32626,
629
+ "<c_69>": 32627,
630
+ "<c_6>": 32628,
631
+ "<c_70>": 32629,
632
+ "<c_71>": 32630,
633
+ "<c_72>": 32631,
634
+ "<c_73>": 32632,
635
+ "<c_74>": 32633,
636
+ "<c_75>": 32634,
637
+ "<c_76>": 32635,
638
+ "<c_77>": 32636,
639
+ "<c_78>": 32637,
640
+ "<c_79>": 32638,
641
+ "<c_7>": 32639,
642
+ "<c_80>": 32640,
643
+ "<c_81>": 32641,
644
+ "<c_82>": 32642,
645
+ "<c_83>": 32643,
646
+ "<c_84>": 32644,
647
+ "<c_85>": 32645,
648
+ "<c_86>": 32646,
649
+ "<c_87>": 32647,
650
+ "<c_88>": 32648,
651
+ "<c_89>": 32649,
652
+ "<c_8>": 32650,
653
+ "<c_90>": 32651,
654
+ "<c_91>": 32652,
655
+ "<c_92>": 32653,
656
+ "<c_93>": 32654,
657
+ "<c_94>": 32655,
658
+ "<c_95>": 32656,
659
+ "<c_96>": 32657,
660
+ "<c_97>": 32658,
661
+ "<c_98>": 32659,
662
+ "<c_99>": 32660,
663
+ "<c_9>": 32661,
664
+ "<d_0>": 32662,
665
+ "<d_100>": 32663,
666
+ "<d_101>": 32664,
667
+ "<d_102>": 32665,
668
+ "<d_103>": 32666,
669
+ "<d_104>": 32667,
670
+ "<d_105>": 32668,
671
+ "<d_106>": 32669,
672
+ "<d_107>": 32670,
673
+ "<d_108>": 32671,
674
+ "<d_109>": 32672,
675
+ "<d_10>": 32673,
676
+ "<d_110>": 32674,
677
+ "<d_111>": 32675,
678
+ "<d_112>": 32676,
679
+ "<d_113>": 32677,
680
+ "<d_114>": 32678,
681
+ "<d_115>": 32679,
682
+ "<d_116>": 32680,
683
+ "<d_117>": 32681,
684
+ "<d_118>": 32682,
685
+ "<d_119>": 32683,
686
+ "<d_11>": 32684,
687
+ "<d_120>": 32685,
688
+ "<d_121>": 32686,
689
+ "<d_122>": 32687,
690
+ "<d_123>": 32688,
691
+ "<d_124>": 32689,
692
+ "<d_125>": 32690,
693
+ "<d_126>": 32691,
694
+ "<d_127>": 32692,
695
+ "<d_128>": 32693,
696
+ "<d_129>": 32694,
697
+ "<d_12>": 32695,
698
+ "<d_130>": 32696,
699
+ "<d_131>": 32697,
700
+ "<d_132>": 32698,
701
+ "<d_133>": 32699,
702
+ "<d_134>": 32700,
703
+ "<d_135>": 32701,
704
+ "<d_136>": 32702,
705
+ "<d_137>": 32703,
706
+ "<d_138>": 32704,
707
+ "<d_139>": 32705,
708
+ "<d_13>": 32706,
709
+ "<d_140>": 32707,
710
+ "<d_141>": 32708,
711
+ "<d_142>": 32709,
712
+ "<d_143>": 32710,
713
+ "<d_144>": 32711,
714
+ "<d_145>": 32712,
715
+ "<d_146>": 32713,
716
+ "<d_147>": 32714,
717
+ "<d_148>": 32715,
718
+ "<d_149>": 32716,
719
+ "<d_14>": 32717,
720
+ "<d_150>": 32718,
721
+ "<d_151>": 32719,
722
+ "<d_152>": 32720,
723
+ "<d_153>": 32721,
724
+ "<d_154>": 32722,
725
+ "<d_155>": 32723,
726
+ "<d_156>": 32724,
727
+ "<d_157>": 32725,
728
+ "<d_158>": 32726,
729
+ "<d_159>": 32727,
730
+ "<d_15>": 32728,
731
+ "<d_160>": 32729,
732
+ "<d_161>": 32730,
733
+ "<d_162>": 32731,
734
+ "<d_163>": 32732,
735
+ "<d_164>": 32733,
736
+ "<d_165>": 32734,
737
+ "<d_166>": 32735,
738
+ "<d_167>": 32736,
739
+ "<d_168>": 32737,
740
+ "<d_169>": 32738,
741
+ "<d_16>": 32739,
742
+ "<d_170>": 32740,
743
+ "<d_171>": 32741,
744
+ "<d_172>": 32742,
745
+ "<d_173>": 32743,
746
+ "<d_174>": 32744,
747
+ "<d_175>": 32745,
748
+ "<d_176>": 32746,
749
+ "<d_177>": 32747,
750
+ "<d_178>": 32748,
751
+ "<d_179>": 32749,
752
+ "<d_17>": 32750,
753
+ "<d_180>": 32751,
754
+ "<d_181>": 32752,
755
+ "<d_182>": 32753,
756
+ "<d_183>": 32754,
757
+ "<d_184>": 32755,
758
+ "<d_185>": 32756,
759
+ "<d_186>": 32757,
760
+ "<d_187>": 32758,
761
+ "<d_188>": 32759,
762
+ "<d_189>": 32760,
763
+ "<d_18>": 32761,
764
+ "<d_190>": 32762,
765
+ "<d_191>": 32763,
766
+ "<d_192>": 32764,
767
+ "<d_193>": 32765,
768
+ "<d_194>": 32766,
769
+ "<d_195>": 32767,
770
+ "<d_196>": 32768,
771
+ "<d_197>": 32769,
772
+ "<d_198>": 32770,
773
+ "<d_199>": 32771,
774
+ "<d_19>": 32772,
775
+ "<d_1>": 32773,
776
+ "<d_200>": 32774,
777
+ "<d_201>": 32775,
778
+ "<d_202>": 32776,
779
+ "<d_203>": 32777,
780
+ "<d_204>": 32778,
781
+ "<d_205>": 32779,
782
+ "<d_206>": 32780,
783
+ "<d_207>": 32781,
784
+ "<d_208>": 32782,
785
+ "<d_209>": 32783,
786
+ "<d_20>": 32784,
787
+ "<d_210>": 32785,
788
+ "<d_211>": 32786,
789
+ "<d_212>": 32787,
790
+ "<d_213>": 32788,
791
+ "<d_214>": 32789,
792
+ "<d_215>": 32790,
793
+ "<d_216>": 32791,
794
+ "<d_217>": 32792,
795
+ "<d_218>": 32793,
796
+ "<d_219>": 32794,
797
+ "<d_21>": 32795,
798
+ "<d_220>": 32796,
799
+ "<d_221>": 32797,
800
+ "<d_222>": 32798,
801
+ "<d_223>": 32799,
802
+ "<d_224>": 32800,
803
+ "<d_225>": 32801,
804
+ "<d_226>": 32802,
805
+ "<d_227>": 32803,
806
+ "<d_228>": 32804,
807
+ "<d_229>": 32805,
808
+ "<d_22>": 32806,
809
+ "<d_230>": 32807,
810
+ "<d_231>": 32808,
811
+ "<d_232>": 32809,
812
+ "<d_233>": 32810,
813
+ "<d_234>": 32811,
814
+ "<d_235>": 32812,
815
+ "<d_236>": 32813,
816
+ "<d_237>": 32814,
817
+ "<d_238>": 32815,
818
+ "<d_239>": 32816,
819
+ "<d_23>": 32817,
820
+ "<d_240>": 32818,
821
+ "<d_241>": 32819,
822
+ "<d_242>": 32820,
823
+ "<d_243>": 32821,
824
+ "<d_244>": 32822,
825
+ "<d_245>": 32823,
826
+ "<d_246>": 32824,
827
+ "<d_247>": 32825,
828
+ "<d_248>": 32826,
829
+ "<d_249>": 32827,
830
+ "<d_24>": 32828,
831
+ "<d_250>": 32829,
832
+ "<d_251>": 32830,
833
+ "<d_252>": 32831,
834
+ "<d_253>": 32832,
835
+ "<d_254>": 32833,
836
+ "<d_255>": 32834,
837
+ "<d_25>": 32835,
838
+ "<d_26>": 32836,
839
+ "<d_27>": 32837,
840
+ "<d_28>": 32838,
841
+ "<d_29>": 32839,
842
+ "<d_2>": 32840,
843
+ "<d_30>": 32841,
844
+ "<d_31>": 32842,
845
+ "<d_32>": 32843,
846
+ "<d_33>": 32844,
847
+ "<d_34>": 32845,
848
+ "<d_35>": 32846,
849
+ "<d_36>": 32847,
850
+ "<d_37>": 32848,
851
+ "<d_38>": 32849,
852
+ "<d_39>": 32850,
853
+ "<d_3>": 32851,
854
+ "<d_40>": 32852,
855
+ "<d_41>": 32853,
856
+ "<d_42>": 32854,
857
+ "<d_43>": 32855,
858
+ "<d_44>": 32856,
859
+ "<d_45>": 32857,
860
+ "<d_46>": 32858,
861
+ "<d_47>": 32859,
862
+ "<d_48>": 32860,
863
+ "<d_49>": 32861,
864
+ "<d_4>": 32862,
865
+ "<d_50>": 32863,
866
+ "<d_51>": 32864,
867
+ "<d_52>": 32865,
868
+ "<d_53>": 32866,
869
+ "<d_54>": 32867,
870
+ "<d_55>": 32868,
871
+ "<d_56>": 32869,
872
+ "<d_57>": 32870,
873
+ "<d_58>": 32871,
874
+ "<d_59>": 32872,
875
+ "<d_5>": 32873,
876
+ "<d_60>": 32874,
877
+ "<d_61>": 32875,
878
+ "<d_62>": 32876,
879
+ "<d_63>": 32877,
880
+ "<d_64>": 32878,
881
+ "<d_65>": 32879,
882
+ "<d_66>": 32880,
883
+ "<d_67>": 32881,
884
+ "<d_68>": 32882,
885
+ "<d_69>": 32883,
886
+ "<d_6>": 32884,
887
+ "<d_70>": 32885,
888
+ "<d_71>": 32886,
889
+ "<d_72>": 32887,
890
+ "<d_73>": 32888,
891
+ "<d_74>": 32889,
892
+ "<d_75>": 32890,
893
+ "<d_76>": 32891,
894
+ "<d_77>": 32892,
895
+ "<d_78>": 32893,
896
+ "<d_79>": 32894,
897
+ "<d_7>": 32895,
898
+ "<d_80>": 32896,
899
+ "<d_81>": 32897,
900
+ "<d_82>": 32898,
901
+ "<d_83>": 32899,
902
+ "<d_84>": 32900,
903
+ "<d_85>": 32901,
904
+ "<d_86>": 32902,
905
+ "<d_87>": 32903,
906
+ "<d_88>": 32904,
907
+ "<d_89>": 32905,
908
+ "<d_8>": 32906,
909
+ "<d_90>": 32907,
910
+ "<d_91>": 32908,
911
+ "<d_92>": 32909,
912
+ "<d_93>": 32910,
913
+ "<d_94>": 32911,
914
+ "<d_95>": 32912,
915
+ "<d_96>": 32913,
916
+ "<d_97>": 32914,
917
+ "<d_98>": 32915,
918
+ "<d_99>": 32916,
919
+ "<d_9>": 32917
920
+ }
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "meta-llama/Llama-2-7b-hf",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 11008,
15
+ "max_position_embeddings": 4096,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "num_attention_heads": 32,
19
+ "num_hidden_layers": 32,
20
+ "num_key_value_heads": 32,
21
+ "pretraining_tp": 1,
22
+ "rms_norm_eps": 1e-05,
23
+ "rope_scaling": null,
24
+ "rope_theta": 10000.0,
25
+ "tie_word_embeddings": false,
26
+ "torch_dtype": "float16",
27
+ "transformers_version": "4.45.2",
28
+ "use_cache": true,
29
+ "vocab_size": 32918
30
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<unk>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
trainer_state.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2060,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.4854368932038835,
13
+ "grad_norm": 1.139075756072998,
14
+ "learning_rate": 8.794946550048592e-05,
15
+ "loss": 3.1617,
16
+ "step": 250
17
+ },
18
+ {
19
+ "epoch": 0.970873786407767,
20
+ "grad_norm": 0.9628340005874634,
21
+ "learning_rate": 7.580174927113704e-05,
22
+ "loss": 2.3986,
23
+ "step": 500
24
+ },
25
+ {
26
+ "epoch": 1.4563106796116505,
27
+ "grad_norm": 0.7678180932998657,
28
+ "learning_rate": 6.365403304178815e-05,
29
+ "loss": 2.3268,
30
+ "step": 750
31
+ },
32
+ {
33
+ "epoch": 1.941747572815534,
34
+ "grad_norm": 0.735146701335907,
35
+ "learning_rate": 5.150631681243926e-05,
36
+ "loss": 2.3146,
37
+ "step": 1000
38
+ },
39
+ {
40
+ "epoch": 2.4271844660194173,
41
+ "grad_norm": 0.6907908320426941,
42
+ "learning_rate": 3.9358600583090386e-05,
43
+ "loss": 2.2942,
44
+ "step": 1250
45
+ },
46
+ {
47
+ "epoch": 2.912621359223301,
48
+ "grad_norm": 0.7977621555328369,
49
+ "learning_rate": 2.72108843537415e-05,
50
+ "loss": 2.286,
51
+ "step": 1500
52
+ },
53
+ {
54
+ "epoch": 3.3980582524271843,
55
+ "grad_norm": 0.7419993281364441,
56
+ "learning_rate": 1.5063168124392615e-05,
57
+ "loss": 2.2667,
58
+ "step": 1750
59
+ },
60
+ {
61
+ "epoch": 3.883495145631068,
62
+ "grad_norm": 0.7748873233795166,
63
+ "learning_rate": 2.915451895043732e-06,
64
+ "loss": 2.2535,
65
+ "step": 2000
66
+ },
67
+ {
68
+ "epoch": 4.0,
69
+ "step": 2060,
70
+ "total_flos": 3.616227843169714e+18,
71
+ "train_loss": 2.4079057119425062,
72
+ "train_runtime": 29920.0718,
73
+ "train_samples_per_second": 17.625,
74
+ "train_steps_per_second": 0.069
75
+ }
76
+ ],
77
+ "logging_steps": 250,
78
+ "max_steps": 2060,
79
+ "num_input_tokens_seen": 0,
80
+ "num_train_epochs": 4,
81
+ "save_steps": 500,
82
+ "stateful_callbacks": {
83
+ "TrainerControl": {
84
+ "args": {
85
+ "should_epoch_stop": false,
86
+ "should_evaluate": false,
87
+ "should_log": false,
88
+ "should_save": false,
89
+ "should_training_stop": false
90
+ },
91
+ "attributes": {}
92
+ }
93
+ },
94
+ "total_flos": 3.616227843169714e+18,
95
+ "train_batch_size": 16,
96
+ "trial_name": null,
97
+ "trial_params": null
98
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7730c2aeda0ef80b88155d30afa262f9a7e3cb30b892963a7a33bb8c63f05086
3
+ size 5240