IliyanGochev
commited on
Commit
•
0475165
1
Parent(s):
86c7ad8
Training in progress epoch 4
Browse files- README.md +13 -0
- Whisper PEFT Fine-Tuning/events.out.tfevents.1696436338.MLbox.300106.0 +2 -2
- adapter_model.bin +1 -1
- best_checkpoint/README.md +65 -0
- best_checkpoint/adapter_config.json +131 -131
- best_checkpoint/adapter_model.bin +2 -2
- best_checkpoint/optimizer.bin +1 -1
- best_checkpoint/random_states_0.pkl +1 -1
- best_checkpoint/random_states_1.pkl +1 -1
- best_checkpoint/scheduler.bin +1 -1
README.md
CHANGED
@@ -66,6 +66,18 @@ The following `bitsandbytes` quantization config was used during training:
|
|
66 |
- bnb_4bit_use_double_quant: False
|
67 |
- bnb_4bit_compute_dtype: float32
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
The following `bitsandbytes` quantization config was used during training:
|
70 |
- quant_method: bitsandbytes
|
71 |
- load_in_8bit: True
|
@@ -97,6 +109,7 @@ The following `bitsandbytes` quantization config was used during training:
|
|
97 |
- PEFT 0.5.0
|
98 |
- PEFT 0.5.0
|
99 |
- PEFT 0.5.0
|
|
|
100 |
|
101 |
- PEFT 0.5.0.dev0
|
102 |
`bitsandbytes` quantization config was used during training:
|
|
|
66 |
- bnb_4bit_use_double_quant: False
|
67 |
- bnb_4bit_compute_dtype: float32
|
68 |
|
69 |
+
The following `bitsandbytes` quantization config was used during training:
|
70 |
+
- quant_method: bitsandbytes
|
71 |
+
- load_in_8bit: True
|
72 |
+
- load_in_4bit: False
|
73 |
+
- llm_int8_threshold: 6.0
|
74 |
+
- llm_int8_skip_modules: None
|
75 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
76 |
+
- llm_int8_has_fp16_weight: False
|
77 |
+
- bnb_4bit_quant_type: fp4
|
78 |
+
- bnb_4bit_use_double_quant: False
|
79 |
+
- bnb_4bit_compute_dtype: float32
|
80 |
+
|
81 |
The following `bitsandbytes` quantization config was used during training:
|
82 |
- quant_method: bitsandbytes
|
83 |
- load_in_8bit: True
|
|
|
109 |
- PEFT 0.5.0
|
110 |
- PEFT 0.5.0
|
111 |
- PEFT 0.5.0
|
112 |
+
- PEFT 0.5.0
|
113 |
|
114 |
- PEFT 0.5.0.dev0
|
115 |
`bitsandbytes` quantization config was used during training:
|
Whisper PEFT Fine-Tuning/events.out.tfevents.1696436338.MLbox.300106.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2dddb1233638c384a06f7e03049f7f2ee00a975c82118ba38a499608ecb2599
|
3 |
+
size 5227
|
adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 38697637
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:945fb9287c8252170548a62c8b31f0e2bac3a2123e2c7771498ff4b0ed86667f
|
3 |
size 38697637
|
best_checkpoint/README.md
CHANGED
@@ -352,6 +352,66 @@ The following `bitsandbytes` quantization config was used during training:
|
|
352 |
- bnb_4bit_use_double_quant: False
|
353 |
- bnb_4bit_compute_dtype: float32
|
354 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
355 |
The following `bitsandbytes` quantization config was used during training:
|
356 |
- quant_method: bitsandbytes
|
357 |
- load_in_8bit: True
|
@@ -394,5 +454,10 @@ The following `bitsandbytes` quantization config was used during training:
|
|
394 |
- PEFT 0.5.0
|
395 |
- PEFT 0.5.0
|
396 |
- PEFT 0.5.0
|
|
|
|
|
|
|
|
|
|
|
397 |
|
398 |
- PEFT 0.5.0
|
|
|
352 |
- bnb_4bit_use_double_quant: False
|
353 |
- bnb_4bit_compute_dtype: float32
|
354 |
|
355 |
+
The following `bitsandbytes` quantization config was used during training:
|
356 |
+
- quant_method: bitsandbytes
|
357 |
+
- load_in_8bit: True
|
358 |
+
- load_in_4bit: False
|
359 |
+
- llm_int8_threshold: 6.0
|
360 |
+
- llm_int8_skip_modules: None
|
361 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
362 |
+
- llm_int8_has_fp16_weight: False
|
363 |
+
- bnb_4bit_quant_type: fp4
|
364 |
+
- bnb_4bit_use_double_quant: False
|
365 |
+
- bnb_4bit_compute_dtype: float32
|
366 |
+
|
367 |
+
The following `bitsandbytes` quantization config was used during training:
|
368 |
+
- quant_method: bitsandbytes
|
369 |
+
- load_in_8bit: True
|
370 |
+
- load_in_4bit: False
|
371 |
+
- llm_int8_threshold: 6.0
|
372 |
+
- llm_int8_skip_modules: None
|
373 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
374 |
+
- llm_int8_has_fp16_weight: False
|
375 |
+
- bnb_4bit_quant_type: fp4
|
376 |
+
- bnb_4bit_use_double_quant: False
|
377 |
+
- bnb_4bit_compute_dtype: float32
|
378 |
+
|
379 |
+
The following `bitsandbytes` quantization config was used during training:
|
380 |
+
- quant_method: bitsandbytes
|
381 |
+
- load_in_8bit: True
|
382 |
+
- load_in_4bit: False
|
383 |
+
- llm_int8_threshold: 6.0
|
384 |
+
- llm_int8_skip_modules: None
|
385 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
386 |
+
- llm_int8_has_fp16_weight: False
|
387 |
+
- bnb_4bit_quant_type: fp4
|
388 |
+
- bnb_4bit_use_double_quant: False
|
389 |
+
- bnb_4bit_compute_dtype: float32
|
390 |
+
|
391 |
+
The following `bitsandbytes` quantization config was used during training:
|
392 |
+
- quant_method: bitsandbytes
|
393 |
+
- load_in_8bit: True
|
394 |
+
- load_in_4bit: False
|
395 |
+
- llm_int8_threshold: 6.0
|
396 |
+
- llm_int8_skip_modules: None
|
397 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
398 |
+
- llm_int8_has_fp16_weight: False
|
399 |
+
- bnb_4bit_quant_type: fp4
|
400 |
+
- bnb_4bit_use_double_quant: False
|
401 |
+
- bnb_4bit_compute_dtype: float32
|
402 |
+
|
403 |
+
The following `bitsandbytes` quantization config was used during training:
|
404 |
+
- quant_method: bitsandbytes
|
405 |
+
- load_in_8bit: True
|
406 |
+
- load_in_4bit: False
|
407 |
+
- llm_int8_threshold: 6.0
|
408 |
+
- llm_int8_skip_modules: None
|
409 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
410 |
+
- llm_int8_has_fp16_weight: False
|
411 |
+
- bnb_4bit_quant_type: fp4
|
412 |
+
- bnb_4bit_use_double_quant: False
|
413 |
+
- bnb_4bit_compute_dtype: float32
|
414 |
+
|
415 |
The following `bitsandbytes` quantization config was used during training:
|
416 |
- quant_method: bitsandbytes
|
417 |
- load_in_8bit: True
|
|
|
454 |
- PEFT 0.5.0
|
455 |
- PEFT 0.5.0
|
456 |
- PEFT 0.5.0
|
457 |
+
- PEFT 0.5.0
|
458 |
+
- PEFT 0.5.0
|
459 |
+
- PEFT 0.5.0
|
460 |
+
- PEFT 0.5.0
|
461 |
+
- PEFT 0.5.0
|
462 |
|
463 |
- PEFT 0.5.0
|
best_checkpoint/adapter_config.json
CHANGED
@@ -73,7 +73,7 @@
|
|
73 |
false,
|
74 |
false,
|
75 |
false,
|
76 |
-
|
77 |
true,
|
78 |
true
|
79 |
],
|
@@ -102,7 +102,7 @@
|
|
102 |
true,
|
103 |
true,
|
104 |
true,
|
105 |
-
|
106 |
true
|
107 |
],
|
108 |
"model.decoder.layers.0.self_attn.k_proj.lora_E": [
|
@@ -215,7 +215,7 @@
|
|
215 |
false,
|
216 |
false,
|
217 |
false,
|
218 |
-
|
219 |
],
|
220 |
"model.decoder.layers.1.fc1.lora_E": [
|
221 |
true,
|
@@ -411,7 +411,7 @@
|
|
411 |
true,
|
412 |
false,
|
413 |
false,
|
414 |
-
|
415 |
],
|
416 |
"model.decoder.layers.10.self_attn.q_proj.lora_E": [
|
417 |
false,
|
@@ -504,7 +504,7 @@
|
|
504 |
true,
|
505 |
true,
|
506 |
true,
|
507 |
-
|
508 |
true,
|
509 |
true,
|
510 |
true,
|
@@ -550,7 +550,7 @@
|
|
550 |
false,
|
551 |
false,
|
552 |
false,
|
553 |
-
|
554 |
true
|
555 |
],
|
556 |
"model.decoder.layers.11.self_attn.q_proj.lora_E": [
|
@@ -574,7 +574,7 @@
|
|
574 |
false,
|
575 |
false,
|
576 |
false,
|
577 |
-
|
578 |
false,
|
579 |
false,
|
580 |
false,
|
@@ -641,7 +641,7 @@
|
|
641 |
true,
|
642 |
true,
|
643 |
true,
|
644 |
-
|
645 |
true,
|
646 |
true,
|
647 |
true,
|
@@ -666,7 +666,7 @@
|
|
666 |
true
|
667 |
],
|
668 |
"model.decoder.layers.12.self_attn.k_proj.lora_E": [
|
669 |
-
|
670 |
false,
|
671 |
false,
|
672 |
false,
|
@@ -683,14 +683,14 @@
|
|
683 |
false,
|
684 |
false,
|
685 |
false,
|
686 |
-
|
687 |
false,
|
688 |
false,
|
689 |
false,
|
690 |
false,
|
691 |
true,
|
692 |
false,
|
693 |
-
|
694 |
false
|
695 |
],
|
696 |
"model.decoder.layers.12.self_attn.q_proj.lora_E": [
|
@@ -747,7 +747,7 @@
|
|
747 |
false,
|
748 |
false,
|
749 |
false,
|
750 |
-
|
751 |
],
|
752 |
"model.decoder.layers.13.encoder_attn.q_proj.lora_E": [
|
753 |
false,
|
@@ -770,8 +770,8 @@
|
|
770 |
false,
|
771 |
true,
|
772 |
true,
|
773 |
-
true,
|
774 |
false,
|
|
|
775 |
false,
|
776 |
true,
|
777 |
true,
|
@@ -823,9 +823,9 @@
|
|
823 |
false,
|
824 |
false,
|
825 |
true,
|
826 |
-
false,
|
827 |
true,
|
828 |
false,
|
|
|
829 |
true,
|
830 |
false,
|
831 |
true,
|
@@ -962,7 +962,7 @@
|
|
962 |
"model.decoder.layers.14.self_attn.out_proj.lora_E": [
|
963 |
false,
|
964 |
true,
|
965 |
-
|
966 |
true,
|
967 |
false,
|
968 |
false,
|
@@ -1108,7 +1108,7 @@
|
|
1108 |
true,
|
1109 |
true,
|
1110 |
false,
|
1111 |
-
|
1112 |
false,
|
1113 |
false,
|
1114 |
true
|
@@ -1135,8 +1135,8 @@
|
|
1135 |
true,
|
1136 |
false,
|
1137 |
false,
|
1138 |
-
|
1139 |
-
|
1140 |
true,
|
1141 |
true,
|
1142 |
false
|
@@ -1157,12 +1157,12 @@
|
|
1157 |
],
|
1158 |
"model.decoder.layers.16.encoder_attn.out_proj.lora_E": [
|
1159 |
false,
|
1160 |
-
|
1161 |
false,
|
1162 |
false,
|
1163 |
false,
|
1164 |
true,
|
1165 |
-
|
1166 |
false,
|
1167 |
false,
|
1168 |
false,
|
@@ -1181,7 +1181,7 @@
|
|
1181 |
false,
|
1182 |
false,
|
1183 |
false,
|
1184 |
-
|
1185 |
],
|
1186 |
"model.decoder.layers.16.encoder_attn.v_proj.lora_E": [
|
1187 |
true,
|
@@ -1221,7 +1221,7 @@
|
|
1221 |
true,
|
1222 |
true,
|
1223 |
true,
|
1224 |
-
|
1225 |
true,
|
1226 |
true
|
1227 |
],
|
@@ -1272,7 +1272,7 @@
|
|
1272 |
true,
|
1273 |
true,
|
1274 |
true,
|
1275 |
-
|
1276 |
true,
|
1277 |
true,
|
1278 |
true,
|
@@ -1296,10 +1296,9 @@
|
|
1296 |
false
|
1297 |
],
|
1298 |
"model.decoder.layers.17.encoder_attn.out_proj.lora_E": [
|
1299 |
-
false,
|
1300 |
-
false,
|
1301 |
true,
|
1302 |
false,
|
|
|
1303 |
false,
|
1304 |
false,
|
1305 |
false,
|
@@ -1307,6 +1306,7 @@
|
|
1307 |
false,
|
1308 |
false,
|
1309 |
false,
|
|
|
1310 |
false
|
1311 |
],
|
1312 |
"model.decoder.layers.17.encoder_attn.q_proj.lora_E": [
|
@@ -1329,7 +1329,7 @@
|
|
1329 |
true,
|
1330 |
true,
|
1331 |
true,
|
1332 |
-
|
1333 |
true,
|
1334 |
true,
|
1335 |
true,
|
@@ -1375,7 +1375,7 @@
|
|
1375 |
false,
|
1376 |
false,
|
1377 |
false,
|
1378 |
-
|
1379 |
false,
|
1380 |
false
|
1381 |
],
|
@@ -1461,7 +1461,7 @@
|
|
1461 |
false,
|
1462 |
false,
|
1463 |
false,
|
1464 |
-
|
1465 |
],
|
1466 |
"model.decoder.layers.18.encoder_attn.v_proj.lora_E": [
|
1467 |
true,
|
@@ -1471,7 +1471,7 @@
|
|
1471 |
true,
|
1472 |
true,
|
1473 |
true,
|
1474 |
-
|
1475 |
true,
|
1476 |
true,
|
1477 |
true,
|
@@ -1503,7 +1503,7 @@
|
|
1503 |
true,
|
1504 |
true,
|
1505 |
false,
|
1506 |
-
|
1507 |
],
|
1508 |
"model.decoder.layers.18.self_attn.k_proj.lora_E": [
|
1509 |
false,
|
@@ -1521,7 +1521,7 @@
|
|
1521 |
],
|
1522 |
"model.decoder.layers.18.self_attn.out_proj.lora_E": [
|
1523 |
true,
|
1524 |
-
|
1525 |
true,
|
1526 |
false,
|
1527 |
true,
|
@@ -1552,7 +1552,7 @@
|
|
1552 |
false,
|
1553 |
true,
|
1554 |
false,
|
1555 |
-
|
1556 |
true,
|
1557 |
false,
|
1558 |
false,
|
@@ -1610,7 +1610,7 @@
|
|
1610 |
false,
|
1611 |
false,
|
1612 |
false,
|
1613 |
-
|
1614 |
false,
|
1615 |
false,
|
1616 |
false,
|
@@ -1656,20 +1656,20 @@
|
|
1656 |
false,
|
1657 |
false,
|
1658 |
false,
|
1659 |
-
|
1660 |
false
|
1661 |
],
|
1662 |
"model.decoder.layers.19.self_attn.out_proj.lora_E": [
|
1663 |
true,
|
1664 |
false,
|
1665 |
false,
|
1666 |
-
true,
|
1667 |
false,
|
1668 |
-
true,
|
1669 |
false,
|
|
|
1670 |
false,
|
1671 |
false,
|
1672 |
true,
|
|
|
1673 |
false,
|
1674 |
false
|
1675 |
],
|
@@ -1726,7 +1726,7 @@
|
|
1726 |
false,
|
1727 |
false,
|
1728 |
false,
|
1729 |
-
|
1730 |
false
|
1731 |
],
|
1732 |
"model.decoder.layers.2.encoder_attn.q_proj.lora_E": [
|
@@ -1810,7 +1810,7 @@
|
|
1810 |
false,
|
1811 |
true,
|
1812 |
true,
|
1813 |
-
|
1814 |
false
|
1815 |
],
|
1816 |
"model.decoder.layers.2.self_attn.q_proj.lora_E": [
|
@@ -1861,9 +1861,9 @@
|
|
1861 |
false,
|
1862 |
true,
|
1863 |
true,
|
1864 |
-
false,
|
1865 |
true,
|
1866 |
-
|
|
|
1867 |
true,
|
1868 |
true,
|
1869 |
true,
|
@@ -1922,11 +1922,11 @@
|
|
1922 |
true,
|
1923 |
true,
|
1924 |
true,
|
1925 |
-
|
1926 |
true
|
1927 |
],
|
1928 |
"model.decoder.layers.20.self_attn.k_proj.lora_E": [
|
1929 |
-
|
1930 |
true,
|
1931 |
true,
|
1932 |
true,
|
@@ -2136,7 +2136,7 @@
|
|
2136 |
false
|
2137 |
],
|
2138 |
"model.decoder.layers.22.encoder_attn.out_proj.lora_E": [
|
2139 |
-
|
2140 |
true,
|
2141 |
false,
|
2142 |
true,
|
@@ -2221,13 +2221,13 @@
|
|
2221 |
],
|
2222 |
"model.decoder.layers.22.self_attn.out_proj.lora_E": [
|
2223 |
false,
|
2224 |
-
true,
|
2225 |
false,
|
2226 |
-
true,
|
2227 |
false,
|
2228 |
true,
|
2229 |
false,
|
|
|
2230 |
true,
|
|
|
2231 |
true,
|
2232 |
false,
|
2233 |
true,
|
@@ -2258,8 +2258,8 @@
|
|
2258 |
true,
|
2259 |
true,
|
2260 |
false,
|
2261 |
-
|
2262 |
-
|
2263 |
],
|
2264 |
"model.decoder.layers.23.encoder_attn.k_proj.lora_E": [
|
2265 |
false,
|
@@ -2349,13 +2349,13 @@
|
|
2349 |
false,
|
2350 |
false,
|
2351 |
false,
|
2352 |
-
|
2353 |
true,
|
2354 |
false,
|
2355 |
false,
|
2356 |
true,
|
2357 |
false,
|
2358 |
-
|
2359 |
false,
|
2360 |
true
|
2361 |
],
|
@@ -2364,7 +2364,7 @@
|
|
2364 |
true,
|
2365 |
true,
|
2366 |
true,
|
2367 |
-
|
2368 |
false,
|
2369 |
false,
|
2370 |
true,
|
@@ -2395,10 +2395,10 @@
|
|
2395 |
true,
|
2396 |
false,
|
2397 |
true,
|
2398 |
-
|
2399 |
-
true,
|
2400 |
true,
|
2401 |
false,
|
|
|
2402 |
true
|
2403 |
],
|
2404 |
"model.decoder.layers.24.encoder_attn.k_proj.lora_E": [
|
@@ -2483,7 +2483,7 @@
|
|
2483 |
true,
|
2484 |
false,
|
2485 |
false,
|
2486 |
-
|
2487 |
],
|
2488 |
"model.decoder.layers.24.self_attn.k_proj.lora_E": [
|
2489 |
true,
|
@@ -2492,12 +2492,12 @@
|
|
2492 |
true,
|
2493 |
true,
|
2494 |
true,
|
2495 |
-
false,
|
2496 |
true,
|
2497 |
true,
|
2498 |
true,
|
2499 |
true,
|
2500 |
-
|
|
|
2501 |
],
|
2502 |
"model.decoder.layers.24.self_attn.out_proj.lora_E": [
|
2503 |
true,
|
@@ -2558,12 +2558,12 @@
|
|
2558 |
"model.decoder.layers.25.encoder_attn.out_proj.lora_E": [
|
2559 |
true,
|
2560 |
true,
|
2561 |
-
|
2562 |
true,
|
2563 |
false,
|
2564 |
false,
|
2565 |
true,
|
2566 |
-
|
2567 |
true,
|
2568 |
true,
|
2569 |
true,
|
@@ -2621,20 +2621,20 @@
|
|
2621 |
true,
|
2622 |
true,
|
2623 |
true,
|
2624 |
-
|
2625 |
true,
|
2626 |
true
|
2627 |
],
|
2628 |
"model.decoder.layers.25.self_attn.k_proj.lora_E": [
|
2629 |
true,
|
2630 |
-
|
2631 |
true,
|
2632 |
true,
|
2633 |
false,
|
2634 |
true,
|
2635 |
true,
|
2636 |
-
|
2637 |
-
|
2638 |
true,
|
2639 |
true,
|
2640 |
false
|
@@ -2706,7 +2706,7 @@
|
|
2706 |
true,
|
2707 |
true,
|
2708 |
true,
|
2709 |
-
|
2710 |
true
|
2711 |
],
|
2712 |
"model.decoder.layers.26.encoder_attn.q_proj.lora_E": [
|
@@ -2766,7 +2766,7 @@
|
|
2766 |
false
|
2767 |
],
|
2768 |
"model.decoder.layers.26.self_attn.k_proj.lora_E": [
|
2769 |
-
|
2770 |
true,
|
2771 |
false,
|
2772 |
true,
|
@@ -2782,7 +2782,7 @@
|
|
2782 |
"model.decoder.layers.26.self_attn.out_proj.lora_E": [
|
2783 |
false,
|
2784 |
false,
|
2785 |
-
|
2786 |
true,
|
2787 |
false,
|
2788 |
false,
|
@@ -2796,7 +2796,7 @@
|
|
2796 |
"model.decoder.layers.26.self_attn.q_proj.lora_E": [
|
2797 |
false,
|
2798 |
false,
|
2799 |
-
|
2800 |
false,
|
2801 |
false,
|
2802 |
false,
|
@@ -2811,11 +2811,11 @@
|
|
2811 |
false,
|
2812 |
true,
|
2813 |
false,
|
2814 |
-
|
2815 |
true,
|
2816 |
false,
|
2817 |
false,
|
2818 |
-
|
2819 |
true,
|
2820 |
true,
|
2821 |
false,
|
@@ -2921,7 +2921,7 @@
|
|
2921 |
],
|
2922 |
"model.decoder.layers.27.self_attn.out_proj.lora_E": [
|
2923 |
false,
|
2924 |
-
|
2925 |
true,
|
2926 |
false,
|
2927 |
false,
|
@@ -2948,7 +2948,7 @@
|
|
2948 |
false
|
2949 |
],
|
2950 |
"model.decoder.layers.27.self_attn.v_proj.lora_E": [
|
2951 |
-
|
2952 |
true,
|
2953 |
false,
|
2954 |
false,
|
@@ -2981,11 +2981,11 @@
|
|
2981 |
true,
|
2982 |
true,
|
2983 |
true,
|
2984 |
-
true,
|
2985 |
-
true,
|
2986 |
false,
|
2987 |
true,
|
|
|
2988 |
true,
|
|
|
2989 |
true,
|
2990 |
true
|
2991 |
],
|
@@ -3046,7 +3046,7 @@
|
|
3046 |
true
|
3047 |
],
|
3048 |
"model.decoder.layers.28.self_attn.k_proj.lora_E": [
|
3049 |
-
|
3050 |
true,
|
3051 |
true,
|
3052 |
false,
|
@@ -3097,7 +3097,7 @@
|
|
3097 |
false,
|
3098 |
false,
|
3099 |
false,
|
3100 |
-
|
3101 |
false,
|
3102 |
true
|
3103 |
],
|
@@ -3122,7 +3122,7 @@
|
|
3122 |
false,
|
3123 |
false,
|
3124 |
false,
|
3125 |
-
|
3126 |
false,
|
3127 |
false,
|
3128 |
false,
|
@@ -3191,7 +3191,7 @@
|
|
3191 |
false,
|
3192 |
false,
|
3193 |
false,
|
3194 |
-
|
3195 |
true,
|
3196 |
false,
|
3197 |
false,
|
@@ -3210,7 +3210,7 @@
|
|
3210 |
false,
|
3211 |
false,
|
3212 |
false,
|
3213 |
-
|
3214 |
false
|
3215 |
],
|
3216 |
"model.decoder.layers.29.self_attn.q_proj.lora_E": [
|
@@ -3259,14 +3259,14 @@
|
|
3259 |
true,
|
3260 |
true,
|
3261 |
false,
|
3262 |
-
false,
|
3263 |
true,
|
3264 |
false,
|
|
|
3265 |
true,
|
3266 |
true,
|
3267 |
false,
|
3268 |
false,
|
3269 |
-
|
3270 |
false
|
3271 |
],
|
3272 |
"model.decoder.layers.3.encoder_attn.q_proj.lora_E": [
|
@@ -3368,7 +3368,6 @@
|
|
3368 |
false
|
3369 |
],
|
3370 |
"model.decoder.layers.3.self_attn.v_proj.lora_E": [
|
3371 |
-
true,
|
3372 |
false,
|
3373 |
false,
|
3374 |
false,
|
@@ -3377,8 +3376,9 @@
|
|
3377 |
false,
|
3378 |
false,
|
3379 |
false,
|
3380 |
-
true,
|
3381 |
false,
|
|
|
|
|
3382 |
false
|
3383 |
],
|
3384 |
"model.decoder.layers.30.encoder_attn.k_proj.lora_E": [
|
@@ -3404,7 +3404,7 @@
|
|
3404 |
false,
|
3405 |
false,
|
3406 |
false,
|
3407 |
-
|
3408 |
true,
|
3409 |
false,
|
3410 |
false
|
@@ -3432,7 +3432,7 @@
|
|
3432 |
true,
|
3433 |
true,
|
3434 |
false,
|
3435 |
-
|
3436 |
true,
|
3437 |
true,
|
3438 |
true
|
@@ -3475,7 +3475,7 @@
|
|
3475 |
false,
|
3476 |
true,
|
3477 |
false,
|
3478 |
-
|
3479 |
true,
|
3480 |
true
|
3481 |
],
|
@@ -3517,7 +3517,7 @@
|
|
3517 |
false,
|
3518 |
false,
|
3519 |
false,
|
3520 |
-
|
3521 |
false,
|
3522 |
true
|
3523 |
],
|
@@ -3584,7 +3584,7 @@
|
|
3584 |
false,
|
3585 |
false,
|
3586 |
false,
|
3587 |
-
|
3588 |
false,
|
3589 |
true,
|
3590 |
true,
|
@@ -3686,7 +3686,7 @@
|
|
3686 |
false,
|
3687 |
false,
|
3688 |
false,
|
3689 |
-
|
3690 |
false
|
3691 |
],
|
3692 |
"model.decoder.layers.4.encoder_attn.q_proj.lora_E": [
|
@@ -3705,7 +3705,7 @@
|
|
3705 |
],
|
3706 |
"model.decoder.layers.4.encoder_attn.v_proj.lora_E": [
|
3707 |
true,
|
3708 |
-
|
3709 |
false,
|
3710 |
false,
|
3711 |
false,
|
@@ -3714,7 +3714,7 @@
|
|
3714 |
true,
|
3715 |
false,
|
3716 |
true,
|
3717 |
-
|
3718 |
false
|
3719 |
],
|
3720 |
"model.decoder.layers.4.fc1.lora_E": [
|
@@ -3853,7 +3853,7 @@
|
|
3853 |
false,
|
3854 |
true,
|
3855 |
true,
|
3856 |
-
|
3857 |
false,
|
3858 |
true
|
3859 |
],
|
@@ -4069,7 +4069,7 @@
|
|
4069 |
],
|
4070 |
"model.decoder.layers.6.self_attn.v_proj.lora_E": [
|
4071 |
false,
|
4072 |
-
|
4073 |
false,
|
4074 |
false,
|
4075 |
false,
|
@@ -4079,7 +4079,7 @@
|
|
4079 |
false,
|
4080 |
true,
|
4081 |
false,
|
4082 |
-
|
4083 |
],
|
4084 |
"model.decoder.layers.7.encoder_attn.k_proj.lora_E": [
|
4085 |
false,
|
@@ -4149,7 +4149,7 @@
|
|
4149 |
true,
|
4150 |
true,
|
4151 |
true,
|
4152 |
-
|
4153 |
],
|
4154 |
"model.decoder.layers.7.fc2.lora_E": [
|
4155 |
true,
|
@@ -4428,7 +4428,7 @@
|
|
4428 |
true,
|
4429 |
true,
|
4430 |
true,
|
4431 |
-
|
4432 |
true
|
4433 |
],
|
4434 |
"model.decoder.layers.9.fc2.lora_E": [
|
@@ -4496,9 +4496,9 @@
|
|
4496 |
true,
|
4497 |
false,
|
4498 |
false,
|
|
|
4499 |
false,
|
4500 |
false,
|
4501 |
-
true,
|
4502 |
false
|
4503 |
],
|
4504 |
"model.encoder.layers.0.fc1.lora_E": [
|
@@ -4526,7 +4526,7 @@
|
|
4526 |
false,
|
4527 |
false,
|
4528 |
false,
|
4529 |
-
|
4530 |
false
|
4531 |
],
|
4532 |
"model.encoder.layers.0.self_attn.k_proj.lora_E": [
|
@@ -4550,8 +4550,8 @@
|
|
4550 |
true,
|
4551 |
false,
|
4552 |
false,
|
4553 |
-
|
4554 |
-
|
4555 |
false,
|
4556 |
false,
|
4557 |
false,
|
@@ -4772,13 +4772,13 @@
|
|
4772 |
false,
|
4773 |
true,
|
4774 |
true,
|
4775 |
-
true,
|
4776 |
false,
|
4777 |
false,
|
4778 |
false,
|
4779 |
-
true,
|
4780 |
false,
|
4781 |
true,
|
|
|
|
|
4782 |
false
|
4783 |
],
|
4784 |
"model.encoder.layers.11.self_attn.k_proj.lora_E": [
|
@@ -4832,7 +4832,7 @@
|
|
4832 |
false,
|
4833 |
false,
|
4834 |
false,
|
4835 |
-
|
4836 |
false,
|
4837 |
false,
|
4838 |
false
|
@@ -4885,7 +4885,7 @@
|
|
4885 |
false,
|
4886 |
false,
|
4887 |
false,
|
4888 |
-
|
4889 |
false,
|
4890 |
false,
|
4891 |
false,
|
@@ -4926,7 +4926,7 @@
|
|
4926 |
false,
|
4927 |
false,
|
4928 |
false,
|
4929 |
-
|
4930 |
true,
|
4931 |
false,
|
4932 |
false,
|
@@ -4943,7 +4943,7 @@
|
|
4943 |
true,
|
4944 |
false,
|
4945 |
false,
|
4946 |
-
|
4947 |
false,
|
4948 |
false,
|
4949 |
true,
|
@@ -4967,7 +4967,7 @@
|
|
4967 |
false,
|
4968 |
false,
|
4969 |
false,
|
4970 |
-
|
4971 |
false,
|
4972 |
false,
|
4973 |
false,
|
@@ -5003,7 +5003,7 @@
|
|
5003 |
false,
|
5004 |
false,
|
5005 |
false,
|
5006 |
-
|
5007 |
],
|
5008 |
"model.encoder.layers.14.fc1.lora_E": [
|
5009 |
true,
|
@@ -5016,7 +5016,7 @@
|
|
5016 |
true,
|
5017 |
false,
|
5018 |
false,
|
5019 |
-
|
5020 |
true
|
5021 |
],
|
5022 |
"model.encoder.layers.14.fc2.lora_E": [
|
@@ -5056,7 +5056,7 @@
|
|
5056 |
false,
|
5057 |
false,
|
5058 |
true,
|
5059 |
-
|
5060 |
false,
|
5061 |
true,
|
5062 |
false
|
@@ -5136,11 +5136,11 @@
|
|
5136 |
true,
|
5137 |
false,
|
5138 |
false,
|
5139 |
-
|
5140 |
true,
|
5141 |
false,
|
5142 |
false,
|
5143 |
-
|
5144 |
false,
|
5145 |
false,
|
5146 |
false
|
@@ -5168,7 +5168,7 @@
|
|
5168 |
false,
|
5169 |
false,
|
5170 |
true,
|
5171 |
-
|
5172 |
false,
|
5173 |
false,
|
5174 |
true
|
@@ -5226,7 +5226,7 @@
|
|
5226 |
false,
|
5227 |
false,
|
5228 |
false,
|
5229 |
-
|
5230 |
true
|
5231 |
],
|
5232 |
"model.encoder.layers.16.self_attn.q_proj.lora_E": [
|
@@ -5307,9 +5307,9 @@
|
|
5307 |
true,
|
5308 |
true,
|
5309 |
false,
|
5310 |
-
false,
|
5311 |
true,
|
5312 |
-
|
|
|
5313 |
true,
|
5314 |
true
|
5315 |
],
|
@@ -5391,7 +5391,7 @@
|
|
5391 |
false,
|
5392 |
false,
|
5393 |
false,
|
5394 |
-
|
5395 |
true,
|
5396 |
false,
|
5397 |
false,
|
@@ -5474,7 +5474,7 @@
|
|
5474 |
false,
|
5475 |
false,
|
5476 |
false,
|
5477 |
-
|
5478 |
false,
|
5479 |
false,
|
5480 |
false,
|
@@ -5729,7 +5729,7 @@
|
|
5729 |
false,
|
5730 |
false,
|
5731 |
true,
|
5732 |
-
|
5733 |
false,
|
5734 |
false
|
5735 |
],
|
@@ -5806,7 +5806,7 @@
|
|
5806 |
"model.encoder.layers.22.self_attn.out_proj.lora_E": [
|
5807 |
false,
|
5808 |
false,
|
5809 |
-
|
5810 |
false,
|
5811 |
false,
|
5812 |
true,
|
@@ -5893,7 +5893,7 @@
|
|
5893 |
true,
|
5894 |
true,
|
5895 |
false,
|
5896 |
-
|
5897 |
true,
|
5898 |
true,
|
5899 |
false,
|
@@ -5917,7 +5917,7 @@
|
|
5917 |
],
|
5918 |
"model.encoder.layers.23.self_attn.v_proj.lora_E": [
|
5919 |
true,
|
5920 |
-
|
5921 |
false,
|
5922 |
false,
|
5923 |
true,
|
@@ -6092,7 +6092,7 @@
|
|
6092 |
true,
|
6093 |
true,
|
6094 |
true,
|
6095 |
-
|
6096 |
true,
|
6097 |
true,
|
6098 |
true
|
@@ -6227,7 +6227,7 @@
|
|
6227 |
true,
|
6228 |
true,
|
6229 |
false,
|
6230 |
-
|
6231 |
false,
|
6232 |
true,
|
6233 |
true,
|
@@ -6400,7 +6400,7 @@
|
|
6400 |
true,
|
6401 |
true,
|
6402 |
true,
|
6403 |
-
|
6404 |
true,
|
6405 |
true,
|
6406 |
true
|
@@ -6425,7 +6425,7 @@
|
|
6425 |
true,
|
6426 |
true,
|
6427 |
true,
|
6428 |
-
|
6429 |
true,
|
6430 |
true,
|
6431 |
true,
|
@@ -6564,7 +6564,7 @@
|
|
6564 |
true,
|
6565 |
true,
|
6566 |
true,
|
6567 |
-
|
6568 |
true,
|
6569 |
true,
|
6570 |
true,
|
@@ -6649,8 +6649,8 @@
|
|
6649 |
false,
|
6650 |
false,
|
6651 |
true,
|
6652 |
-
false,
|
6653 |
true,
|
|
|
6654 |
true,
|
6655 |
true,
|
6656 |
false,
|
@@ -6677,13 +6677,13 @@
|
|
6677 |
true,
|
6678 |
true,
|
6679 |
false,
|
6680 |
-
|
6681 |
false,
|
6682 |
true,
|
6683 |
true,
|
6684 |
true,
|
6685 |
true,
|
6686 |
-
|
6687 |
],
|
6688 |
"model.encoder.layers.4.fc1.lora_E": [
|
6689 |
false,
|
@@ -6871,7 +6871,7 @@
|
|
6871 |
false,
|
6872 |
false,
|
6873 |
false,
|
6874 |
-
|
6875 |
false,
|
6876 |
false,
|
6877 |
false,
|
@@ -7040,7 +7040,7 @@
|
|
7040 |
false,
|
7041 |
false,
|
7042 |
false,
|
7043 |
-
|
7044 |
false,
|
7045 |
false,
|
7046 |
false,
|
@@ -7129,8 +7129,8 @@
|
|
7129 |
false,
|
7130 |
false,
|
7131 |
false,
|
7132 |
-
|
7133 |
-
|
7134 |
false
|
7135 |
],
|
7136 |
"model.encoder.layers.9.self_attn.k_proj.lora_E": [
|
|
|
73 |
false,
|
74 |
false,
|
75 |
false,
|
76 |
+
false,
|
77 |
true,
|
78 |
true
|
79 |
],
|
|
|
102 |
true,
|
103 |
true,
|
104 |
true,
|
105 |
+
false,
|
106 |
true
|
107 |
],
|
108 |
"model.decoder.layers.0.self_attn.k_proj.lora_E": [
|
|
|
215 |
false,
|
216 |
false,
|
217 |
false,
|
218 |
+
true
|
219 |
],
|
220 |
"model.decoder.layers.1.fc1.lora_E": [
|
221 |
true,
|
|
|
411 |
true,
|
412 |
false,
|
413 |
false,
|
414 |
+
false
|
415 |
],
|
416 |
"model.decoder.layers.10.self_attn.q_proj.lora_E": [
|
417 |
false,
|
|
|
504 |
true,
|
505 |
true,
|
506 |
true,
|
507 |
+
false,
|
508 |
true,
|
509 |
true,
|
510 |
true,
|
|
|
550 |
false,
|
551 |
false,
|
552 |
false,
|
553 |
+
false,
|
554 |
true
|
555 |
],
|
556 |
"model.decoder.layers.11.self_attn.q_proj.lora_E": [
|
|
|
574 |
false,
|
575 |
false,
|
576 |
false,
|
577 |
+
true,
|
578 |
false,
|
579 |
false,
|
580 |
false,
|
|
|
641 |
true,
|
642 |
true,
|
643 |
true,
|
644 |
+
false,
|
645 |
true,
|
646 |
true,
|
647 |
true,
|
|
|
666 |
true
|
667 |
],
|
668 |
"model.decoder.layers.12.self_attn.k_proj.lora_E": [
|
669 |
+
false,
|
670 |
false,
|
671 |
false,
|
672 |
false,
|
|
|
683 |
false,
|
684 |
false,
|
685 |
false,
|
686 |
+
true,
|
687 |
false,
|
688 |
false,
|
689 |
false,
|
690 |
false,
|
691 |
true,
|
692 |
false,
|
693 |
+
false,
|
694 |
false
|
695 |
],
|
696 |
"model.decoder.layers.12.self_attn.q_proj.lora_E": [
|
|
|
747 |
false,
|
748 |
false,
|
749 |
false,
|
750 |
+
true
|
751 |
],
|
752 |
"model.decoder.layers.13.encoder_attn.q_proj.lora_E": [
|
753 |
false,
|
|
|
770 |
false,
|
771 |
true,
|
772 |
true,
|
|
|
773 |
false,
|
774 |
+
true,
|
775 |
false,
|
776 |
true,
|
777 |
true,
|
|
|
823 |
false,
|
824 |
false,
|
825 |
true,
|
|
|
826 |
true,
|
827 |
false,
|
828 |
+
false,
|
829 |
true,
|
830 |
false,
|
831 |
true,
|
|
|
962 |
"model.decoder.layers.14.self_attn.out_proj.lora_E": [
|
963 |
false,
|
964 |
true,
|
965 |
+
false,
|
966 |
true,
|
967 |
false,
|
968 |
false,
|
|
|
1108 |
true,
|
1109 |
true,
|
1110 |
false,
|
1111 |
+
false,
|
1112 |
false,
|
1113 |
false,
|
1114 |
true
|
|
|
1135 |
true,
|
1136 |
false,
|
1137 |
false,
|
1138 |
+
false,
|
1139 |
+
false,
|
1140 |
true,
|
1141 |
true,
|
1142 |
false
|
|
|
1157 |
],
|
1158 |
"model.decoder.layers.16.encoder_attn.out_proj.lora_E": [
|
1159 |
false,
|
1160 |
+
false,
|
1161 |
false,
|
1162 |
false,
|
1163 |
false,
|
1164 |
true,
|
1165 |
+
false,
|
1166 |
false,
|
1167 |
false,
|
1168 |
false,
|
|
|
1181 |
false,
|
1182 |
false,
|
1183 |
false,
|
1184 |
+
true
|
1185 |
],
|
1186 |
"model.decoder.layers.16.encoder_attn.v_proj.lora_E": [
|
1187 |
true,
|
|
|
1221 |
true,
|
1222 |
true,
|
1223 |
true,
|
1224 |
+
false,
|
1225 |
true,
|
1226 |
true
|
1227 |
],
|
|
|
1272 |
true,
|
1273 |
true,
|
1274 |
true,
|
1275 |
+
false,
|
1276 |
true,
|
1277 |
true,
|
1278 |
true,
|
|
|
1296 |
false
|
1297 |
],
|
1298 |
"model.decoder.layers.17.encoder_attn.out_proj.lora_E": [
|
|
|
|
|
1299 |
true,
|
1300 |
false,
|
1301 |
+
true,
|
1302 |
false,
|
1303 |
false,
|
1304 |
false,
|
|
|
1306 |
false,
|
1307 |
false,
|
1308 |
false,
|
1309 |
+
true,
|
1310 |
false
|
1311 |
],
|
1312 |
"model.decoder.layers.17.encoder_attn.q_proj.lora_E": [
|
|
|
1329 |
true,
|
1330 |
true,
|
1331 |
true,
|
1332 |
+
false,
|
1333 |
true,
|
1334 |
true,
|
1335 |
true,
|
|
|
1375 |
false,
|
1376 |
false,
|
1377 |
false,
|
1378 |
+
true,
|
1379 |
false,
|
1380 |
false
|
1381 |
],
|
|
|
1461 |
false,
|
1462 |
false,
|
1463 |
false,
|
1464 |
+
true
|
1465 |
],
|
1466 |
"model.decoder.layers.18.encoder_attn.v_proj.lora_E": [
|
1467 |
true,
|
|
|
1471 |
true,
|
1472 |
true,
|
1473 |
true,
|
1474 |
+
false,
|
1475 |
true,
|
1476 |
true,
|
1477 |
true,
|
|
|
1503 |
true,
|
1504 |
true,
|
1505 |
false,
|
1506 |
+
false
|
1507 |
],
|
1508 |
"model.decoder.layers.18.self_attn.k_proj.lora_E": [
|
1509 |
false,
|
|
|
1521 |
],
|
1522 |
"model.decoder.layers.18.self_attn.out_proj.lora_E": [
|
1523 |
true,
|
1524 |
+
true,
|
1525 |
true,
|
1526 |
false,
|
1527 |
true,
|
|
|
1552 |
false,
|
1553 |
true,
|
1554 |
false,
|
1555 |
+
false,
|
1556 |
true,
|
1557 |
false,
|
1558 |
false,
|
|
|
1610 |
false,
|
1611 |
false,
|
1612 |
false,
|
1613 |
+
false,
|
1614 |
false,
|
1615 |
false,
|
1616 |
false,
|
|
|
1656 |
false,
|
1657 |
false,
|
1658 |
false,
|
1659 |
+
false,
|
1660 |
false
|
1661 |
],
|
1662 |
"model.decoder.layers.19.self_attn.out_proj.lora_E": [
|
1663 |
true,
|
1664 |
false,
|
1665 |
false,
|
|
|
1666 |
false,
|
|
|
1667 |
false,
|
1668 |
+
true,
|
1669 |
false,
|
1670 |
false,
|
1671 |
true,
|
1672 |
+
true,
|
1673 |
false,
|
1674 |
false
|
1675 |
],
|
|
|
1726 |
false,
|
1727 |
false,
|
1728 |
false,
|
1729 |
+
true,
|
1730 |
false
|
1731 |
],
|
1732 |
"model.decoder.layers.2.encoder_attn.q_proj.lora_E": [
|
|
|
1810 |
false,
|
1811 |
true,
|
1812 |
true,
|
1813 |
+
false,
|
1814 |
false
|
1815 |
],
|
1816 |
"model.decoder.layers.2.self_attn.q_proj.lora_E": [
|
|
|
1861 |
false,
|
1862 |
true,
|
1863 |
true,
|
|
|
1864 |
true,
|
1865 |
+
true,
|
1866 |
+
true,
|
1867 |
true,
|
1868 |
true,
|
1869 |
true,
|
|
|
1922 |
true,
|
1923 |
true,
|
1924 |
true,
|
1925 |
+
true,
|
1926 |
true
|
1927 |
],
|
1928 |
"model.decoder.layers.20.self_attn.k_proj.lora_E": [
|
1929 |
+
false,
|
1930 |
true,
|
1931 |
true,
|
1932 |
true,
|
|
|
2136 |
false
|
2137 |
],
|
2138 |
"model.decoder.layers.22.encoder_attn.out_proj.lora_E": [
|
2139 |
+
true,
|
2140 |
true,
|
2141 |
false,
|
2142 |
true,
|
|
|
2221 |
],
|
2222 |
"model.decoder.layers.22.self_attn.out_proj.lora_E": [
|
2223 |
false,
|
|
|
2224 |
false,
|
|
|
2225 |
false,
|
2226 |
true,
|
2227 |
false,
|
2228 |
+
false,
|
2229 |
true,
|
2230 |
+
false,
|
2231 |
true,
|
2232 |
false,
|
2233 |
true,
|
|
|
2258 |
true,
|
2259 |
true,
|
2260 |
false,
|
2261 |
+
true,
|
2262 |
+
false
|
2263 |
],
|
2264 |
"model.decoder.layers.23.encoder_attn.k_proj.lora_E": [
|
2265 |
false,
|
|
|
2349 |
false,
|
2350 |
false,
|
2351 |
false,
|
2352 |
+
false,
|
2353 |
true,
|
2354 |
false,
|
2355 |
false,
|
2356 |
true,
|
2357 |
false,
|
2358 |
+
false,
|
2359 |
false,
|
2360 |
true
|
2361 |
],
|
|
|
2364 |
true,
|
2365 |
true,
|
2366 |
true,
|
2367 |
+
false,
|
2368 |
false,
|
2369 |
false,
|
2370 |
true,
|
|
|
2395 |
true,
|
2396 |
false,
|
2397 |
true,
|
2398 |
+
false,
|
|
|
2399 |
true,
|
2400 |
false,
|
2401 |
+
false,
|
2402 |
true
|
2403 |
],
|
2404 |
"model.decoder.layers.24.encoder_attn.k_proj.lora_E": [
|
|
|
2483 |
true,
|
2484 |
false,
|
2485 |
false,
|
2486 |
+
false
|
2487 |
],
|
2488 |
"model.decoder.layers.24.self_attn.k_proj.lora_E": [
|
2489 |
true,
|
|
|
2492 |
true,
|
2493 |
true,
|
2494 |
true,
|
|
|
2495 |
true,
|
2496 |
true,
|
2497 |
true,
|
2498 |
true,
|
2499 |
+
true,
|
2500 |
+
true
|
2501 |
],
|
2502 |
"model.decoder.layers.24.self_attn.out_proj.lora_E": [
|
2503 |
true,
|
|
|
2558 |
"model.decoder.layers.25.encoder_attn.out_proj.lora_E": [
|
2559 |
true,
|
2560 |
true,
|
2561 |
+
true,
|
2562 |
true,
|
2563 |
false,
|
2564 |
false,
|
2565 |
true,
|
2566 |
+
false,
|
2567 |
true,
|
2568 |
true,
|
2569 |
true,
|
|
|
2621 |
true,
|
2622 |
true,
|
2623 |
true,
|
2624 |
+
false,
|
2625 |
true,
|
2626 |
true
|
2627 |
],
|
2628 |
"model.decoder.layers.25.self_attn.k_proj.lora_E": [
|
2629 |
true,
|
2630 |
+
true,
|
2631 |
true,
|
2632 |
true,
|
2633 |
false,
|
2634 |
true,
|
2635 |
true,
|
2636 |
+
true,
|
2637 |
+
true,
|
2638 |
true,
|
2639 |
true,
|
2640 |
false
|
|
|
2706 |
true,
|
2707 |
true,
|
2708 |
true,
|
2709 |
+
true,
|
2710 |
true
|
2711 |
],
|
2712 |
"model.decoder.layers.26.encoder_attn.q_proj.lora_E": [
|
|
|
2766 |
false
|
2767 |
],
|
2768 |
"model.decoder.layers.26.self_attn.k_proj.lora_E": [
|
2769 |
+
true,
|
2770 |
true,
|
2771 |
false,
|
2772 |
true,
|
|
|
2782 |
"model.decoder.layers.26.self_attn.out_proj.lora_E": [
|
2783 |
false,
|
2784 |
false,
|
2785 |
+
false,
|
2786 |
true,
|
2787 |
false,
|
2788 |
false,
|
|
|
2796 |
"model.decoder.layers.26.self_attn.q_proj.lora_E": [
|
2797 |
false,
|
2798 |
false,
|
2799 |
+
true,
|
2800 |
false,
|
2801 |
false,
|
2802 |
false,
|
|
|
2811 |
false,
|
2812 |
true,
|
2813 |
false,
|
2814 |
+
true,
|
2815 |
true,
|
2816 |
false,
|
2817 |
false,
|
2818 |
+
false,
|
2819 |
true,
|
2820 |
true,
|
2821 |
false,
|
|
|
2921 |
],
|
2922 |
"model.decoder.layers.27.self_attn.out_proj.lora_E": [
|
2923 |
false,
|
2924 |
+
false,
|
2925 |
true,
|
2926 |
false,
|
2927 |
false,
|
|
|
2948 |
false
|
2949 |
],
|
2950 |
"model.decoder.layers.27.self_attn.v_proj.lora_E": [
|
2951 |
+
true,
|
2952 |
true,
|
2953 |
false,
|
2954 |
false,
|
|
|
2981 |
true,
|
2982 |
true,
|
2983 |
true,
|
|
|
|
|
2984 |
false,
|
2985 |
true,
|
2986 |
+
false,
|
2987 |
true,
|
2988 |
+
false,
|
2989 |
true,
|
2990 |
true
|
2991 |
],
|
|
|
3046 |
true
|
3047 |
],
|
3048 |
"model.decoder.layers.28.self_attn.k_proj.lora_E": [
|
3049 |
+
false,
|
3050 |
true,
|
3051 |
true,
|
3052 |
false,
|
|
|
3097 |
false,
|
3098 |
false,
|
3099 |
false,
|
3100 |
+
true,
|
3101 |
false,
|
3102 |
true
|
3103 |
],
|
|
|
3122 |
false,
|
3123 |
false,
|
3124 |
false,
|
3125 |
+
true,
|
3126 |
false,
|
3127 |
false,
|
3128 |
false,
|
|
|
3191 |
false,
|
3192 |
false,
|
3193 |
false,
|
3194 |
+
true,
|
3195 |
true,
|
3196 |
false,
|
3197 |
false,
|
|
|
3210 |
false,
|
3211 |
false,
|
3212 |
false,
|
3213 |
+
true,
|
3214 |
false
|
3215 |
],
|
3216 |
"model.decoder.layers.29.self_attn.q_proj.lora_E": [
|
|
|
3259 |
true,
|
3260 |
true,
|
3261 |
false,
|
|
|
3262 |
true,
|
3263 |
false,
|
3264 |
+
false,
|
3265 |
true,
|
3266 |
true,
|
3267 |
false,
|
3268 |
false,
|
3269 |
+
false,
|
3270 |
false
|
3271 |
],
|
3272 |
"model.decoder.layers.3.encoder_attn.q_proj.lora_E": [
|
|
|
3368 |
false
|
3369 |
],
|
3370 |
"model.decoder.layers.3.self_attn.v_proj.lora_E": [
|
|
|
3371 |
false,
|
3372 |
false,
|
3373 |
false,
|
|
|
3376 |
false,
|
3377 |
false,
|
3378 |
false,
|
|
|
3379 |
false,
|
3380 |
+
true,
|
3381 |
+
true,
|
3382 |
false
|
3383 |
],
|
3384 |
"model.decoder.layers.30.encoder_attn.k_proj.lora_E": [
|
|
|
3404 |
false,
|
3405 |
false,
|
3406 |
false,
|
3407 |
+
true,
|
3408 |
true,
|
3409 |
false,
|
3410 |
false
|
|
|
3432 |
true,
|
3433 |
true,
|
3434 |
false,
|
3435 |
+
false,
|
3436 |
true,
|
3437 |
true,
|
3438 |
true
|
|
|
3475 |
false,
|
3476 |
true,
|
3477 |
false,
|
3478 |
+
false,
|
3479 |
true,
|
3480 |
true
|
3481 |
],
|
|
|
3517 |
false,
|
3518 |
false,
|
3519 |
false,
|
3520 |
+
true,
|
3521 |
false,
|
3522 |
true
|
3523 |
],
|
|
|
3584 |
false,
|
3585 |
false,
|
3586 |
false,
|
3587 |
+
true,
|
3588 |
false,
|
3589 |
true,
|
3590 |
true,
|
|
|
3686 |
false,
|
3687 |
false,
|
3688 |
false,
|
3689 |
+
true,
|
3690 |
false
|
3691 |
],
|
3692 |
"model.decoder.layers.4.encoder_attn.q_proj.lora_E": [
|
|
|
3705 |
],
|
3706 |
"model.decoder.layers.4.encoder_attn.v_proj.lora_E": [
|
3707 |
true,
|
3708 |
+
true,
|
3709 |
false,
|
3710 |
false,
|
3711 |
false,
|
|
|
3714 |
true,
|
3715 |
false,
|
3716 |
true,
|
3717 |
+
true,
|
3718 |
false
|
3719 |
],
|
3720 |
"model.decoder.layers.4.fc1.lora_E": [
|
|
|
3853 |
false,
|
3854 |
true,
|
3855 |
true,
|
3856 |
+
true,
|
3857 |
false,
|
3858 |
true
|
3859 |
],
|
|
|
4069 |
],
|
4070 |
"model.decoder.layers.6.self_attn.v_proj.lora_E": [
|
4071 |
false,
|
4072 |
+
false,
|
4073 |
false,
|
4074 |
false,
|
4075 |
false,
|
|
|
4079 |
false,
|
4080 |
true,
|
4081 |
false,
|
4082 |
+
true
|
4083 |
],
|
4084 |
"model.decoder.layers.7.encoder_attn.k_proj.lora_E": [
|
4085 |
false,
|
|
|
4149 |
true,
|
4150 |
true,
|
4151 |
true,
|
4152 |
+
true
|
4153 |
],
|
4154 |
"model.decoder.layers.7.fc2.lora_E": [
|
4155 |
true,
|
|
|
4428 |
true,
|
4429 |
true,
|
4430 |
true,
|
4431 |
+
false,
|
4432 |
true
|
4433 |
],
|
4434 |
"model.decoder.layers.9.fc2.lora_E": [
|
|
|
4496 |
true,
|
4497 |
false,
|
4498 |
false,
|
4499 |
+
true,
|
4500 |
false,
|
4501 |
false,
|
|
|
4502 |
false
|
4503 |
],
|
4504 |
"model.encoder.layers.0.fc1.lora_E": [
|
|
|
4526 |
false,
|
4527 |
false,
|
4528 |
false,
|
4529 |
+
false,
|
4530 |
false
|
4531 |
],
|
4532 |
"model.encoder.layers.0.self_attn.k_proj.lora_E": [
|
|
|
4550 |
true,
|
4551 |
false,
|
4552 |
false,
|
4553 |
+
true,
|
4554 |
+
true,
|
4555 |
false,
|
4556 |
false,
|
4557 |
false,
|
|
|
4772 |
false,
|
4773 |
true,
|
4774 |
true,
|
|
|
4775 |
false,
|
4776 |
false,
|
4777 |
false,
|
|
|
4778 |
false,
|
4779 |
true,
|
4780 |
+
false,
|
4781 |
+
false,
|
4782 |
false
|
4783 |
],
|
4784 |
"model.encoder.layers.11.self_attn.k_proj.lora_E": [
|
|
|
4832 |
false,
|
4833 |
false,
|
4834 |
false,
|
4835 |
+
false,
|
4836 |
false,
|
4837 |
false,
|
4838 |
false
|
|
|
4885 |
false,
|
4886 |
false,
|
4887 |
false,
|
4888 |
+
false,
|
4889 |
false,
|
4890 |
false,
|
4891 |
false,
|
|
|
4926 |
false,
|
4927 |
false,
|
4928 |
false,
|
4929 |
+
false,
|
4930 |
true,
|
4931 |
false,
|
4932 |
false,
|
|
|
4943 |
true,
|
4944 |
false,
|
4945 |
false,
|
4946 |
+
false,
|
4947 |
false,
|
4948 |
false,
|
4949 |
true,
|
|
|
4967 |
false,
|
4968 |
false,
|
4969 |
false,
|
4970 |
+
false,
|
4971 |
false,
|
4972 |
false,
|
4973 |
false,
|
|
|
5003 |
false,
|
5004 |
false,
|
5005 |
false,
|
5006 |
+
true
|
5007 |
],
|
5008 |
"model.encoder.layers.14.fc1.lora_E": [
|
5009 |
true,
|
|
|
5016 |
true,
|
5017 |
false,
|
5018 |
false,
|
5019 |
+
true,
|
5020 |
true
|
5021 |
],
|
5022 |
"model.encoder.layers.14.fc2.lora_E": [
|
|
|
5056 |
false,
|
5057 |
false,
|
5058 |
true,
|
5059 |
+
true,
|
5060 |
false,
|
5061 |
true,
|
5062 |
false
|
|
|
5136 |
true,
|
5137 |
false,
|
5138 |
false,
|
5139 |
+
true,
|
5140 |
true,
|
5141 |
false,
|
5142 |
false,
|
5143 |
+
true,
|
5144 |
false,
|
5145 |
false,
|
5146 |
false
|
|
|
5168 |
false,
|
5169 |
false,
|
5170 |
true,
|
5171 |
+
true,
|
5172 |
false,
|
5173 |
false,
|
5174 |
true
|
|
|
5226 |
false,
|
5227 |
false,
|
5228 |
false,
|
5229 |
+
false,
|
5230 |
true
|
5231 |
],
|
5232 |
"model.encoder.layers.16.self_attn.q_proj.lora_E": [
|
|
|
5307 |
true,
|
5308 |
true,
|
5309 |
false,
|
|
|
5310 |
true,
|
5311 |
+
true,
|
5312 |
+
true,
|
5313 |
true,
|
5314 |
true
|
5315 |
],
|
|
|
5391 |
false,
|
5392 |
false,
|
5393 |
false,
|
5394 |
+
true,
|
5395 |
true,
|
5396 |
false,
|
5397 |
false,
|
|
|
5474 |
false,
|
5475 |
false,
|
5476 |
false,
|
5477 |
+
false,
|
5478 |
false,
|
5479 |
false,
|
5480 |
false,
|
|
|
5729 |
false,
|
5730 |
false,
|
5731 |
true,
|
5732 |
+
false,
|
5733 |
false,
|
5734 |
false
|
5735 |
],
|
|
|
5806 |
"model.encoder.layers.22.self_attn.out_proj.lora_E": [
|
5807 |
false,
|
5808 |
false,
|
5809 |
+
true,
|
5810 |
false,
|
5811 |
false,
|
5812 |
true,
|
|
|
5893 |
true,
|
5894 |
true,
|
5895 |
false,
|
5896 |
+
true,
|
5897 |
true,
|
5898 |
true,
|
5899 |
false,
|
|
|
5917 |
],
|
5918 |
"model.encoder.layers.23.self_attn.v_proj.lora_E": [
|
5919 |
true,
|
5920 |
+
true,
|
5921 |
false,
|
5922 |
false,
|
5923 |
true,
|
|
|
6092 |
true,
|
6093 |
true,
|
6094 |
true,
|
6095 |
+
true,
|
6096 |
true,
|
6097 |
true,
|
6098 |
true
|
|
|
6227 |
true,
|
6228 |
true,
|
6229 |
false,
|
6230 |
+
false,
|
6231 |
false,
|
6232 |
true,
|
6233 |
true,
|
|
|
6400 |
true,
|
6401 |
true,
|
6402 |
true,
|
6403 |
+
true,
|
6404 |
true,
|
6405 |
true,
|
6406 |
true
|
|
|
6425 |
true,
|
6426 |
true,
|
6427 |
true,
|
6428 |
+
true,
|
6429 |
true,
|
6430 |
true,
|
6431 |
true,
|
|
|
6564 |
true,
|
6565 |
true,
|
6566 |
true,
|
6567 |
+
false,
|
6568 |
true,
|
6569 |
true,
|
6570 |
true,
|
|
|
6649 |
false,
|
6650 |
false,
|
6651 |
true,
|
|
|
6652 |
true,
|
6653 |
+
false,
|
6654 |
true,
|
6655 |
true,
|
6656 |
false,
|
|
|
6677 |
true,
|
6678 |
true,
|
6679 |
false,
|
6680 |
+
true,
|
6681 |
false,
|
6682 |
true,
|
6683 |
true,
|
6684 |
true,
|
6685 |
true,
|
6686 |
+
false
|
6687 |
],
|
6688 |
"model.encoder.layers.4.fc1.lora_E": [
|
6689 |
false,
|
|
|
6871 |
false,
|
6872 |
false,
|
6873 |
false,
|
6874 |
+
false,
|
6875 |
false,
|
6876 |
false,
|
6877 |
false,
|
|
|
7040 |
false,
|
7041 |
false,
|
7042 |
false,
|
7043 |
+
true,
|
7044 |
false,
|
7045 |
false,
|
7046 |
false,
|
|
|
7129 |
false,
|
7130 |
false,
|
7131 |
false,
|
7132 |
+
true,
|
7133 |
+
true,
|
7134 |
false
|
7135 |
],
|
7136 |
"model.encoder.layers.9.self_attn.k_proj.lora_E": [
|
best_checkpoint/adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a4cdee2beed512b551653d291398294f0d644c8e3271f990e283a251dcdf98f
|
3 |
+
size 38590501
|
best_checkpoint/optimizer.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 174313245
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89f55dd801640b071badb2dfe8a3bf36c3a5a4d27e0a1f58cba3cf766b189fe1
|
3 |
size 174313245
|
best_checkpoint/random_states_0.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15691
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:deee05d4c67b9f221c3601194f47b65ccdfa7ddc85f5c16a149c0a2610982803
|
3 |
size 15691
|
best_checkpoint/random_states_1.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15755
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b436eb364998151b462d999d81fa96e8f9849666f0e8d939c855ef4aef9bdb8c
|
3 |
size 15755
|
best_checkpoint/scheduler.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 563
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a27be7d8ebde5aeca12cdbf3aabadc57e7115c3ca71c0bcd471a3f7be3d65e14
|
3 |
size 563
|