IliyanGochev
commited on
Commit
•
86c7ad8
1
Parent(s):
572e560
Training in progress epoch 3
Browse files- README.md +13 -0
- Whisper PEFT Fine-Tuning/events.out.tfevents.1696436338.MLbox.300106.0 +2 -2
- adapter_config.json +93 -93
- adapter_model.bin +2 -2
- best_checkpoint/README.md +91 -0
- best_checkpoint/adapter_config.json +178 -178
- best_checkpoint/adapter_model.bin +2 -2
- best_checkpoint/optimizer.bin +1 -1
- best_checkpoint/random_states_0.pkl +1 -1
- best_checkpoint/random_states_1.pkl +1 -1
- best_checkpoint/scheduler.bin +1 -1
README.md
CHANGED
@@ -54,6 +54,18 @@ The following `bitsandbytes` quantization config was used during training:
|
|
54 |
- bnb_4bit_use_double_quant: False
|
55 |
- bnb_4bit_compute_dtype: float32
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
The following `bitsandbytes` quantization config was used during training:
|
58 |
- quant_method: bitsandbytes
|
59 |
- load_in_8bit: True
|
@@ -84,6 +96,7 @@ The following `bitsandbytes` quantization config was used during training:
|
|
84 |
- PEFT 0.5.0
|
85 |
- PEFT 0.5.0
|
86 |
- PEFT 0.5.0
|
|
|
87 |
|
88 |
- PEFT 0.5.0.dev0
|
89 |
`bitsandbytes` quantization config was used during training:
|
|
|
54 |
- bnb_4bit_use_double_quant: False
|
55 |
- bnb_4bit_compute_dtype: float32
|
56 |
|
57 |
+
The following `bitsandbytes` quantization config was used during training:
|
58 |
+
- quant_method: bitsandbytes
|
59 |
+
- load_in_8bit: True
|
60 |
+
- load_in_4bit: False
|
61 |
+
- llm_int8_threshold: 6.0
|
62 |
+
- llm_int8_skip_modules: None
|
63 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
64 |
+
- llm_int8_has_fp16_weight: False
|
65 |
+
- bnb_4bit_quant_type: fp4
|
66 |
+
- bnb_4bit_use_double_quant: False
|
67 |
+
- bnb_4bit_compute_dtype: float32
|
68 |
+
|
69 |
The following `bitsandbytes` quantization config was used during training:
|
70 |
- quant_method: bitsandbytes
|
71 |
- load_in_8bit: True
|
|
|
96 |
- PEFT 0.5.0
|
97 |
- PEFT 0.5.0
|
98 |
- PEFT 0.5.0
|
99 |
+
- PEFT 0.5.0
|
100 |
|
101 |
- PEFT 0.5.0.dev0
|
102 |
`bitsandbytes` quantization config was used during training:
|
Whisper PEFT Fine-Tuning/events.out.tfevents.1696436338.MLbox.300106.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b344924cb79c25b3e962e7d68421e413fa83e4d68b8e84f6481d5e17e53a6ffb
|
3 |
+
size 4069
|
adapter_config.json
CHANGED
@@ -210,12 +210,12 @@
|
|
210 |
true,
|
211 |
false,
|
212 |
false,
|
213 |
-
true,
|
214 |
false,
|
215 |
false,
|
216 |
false,
|
217 |
false,
|
218 |
-
|
|
|
219 |
],
|
220 |
"model.decoder.layers.1.fc1.lora_E": [
|
221 |
true,
|
@@ -365,7 +365,7 @@
|
|
365 |
true,
|
366 |
true,
|
367 |
true,
|
368 |
-
|
369 |
true,
|
370 |
true,
|
371 |
true,
|
@@ -549,7 +549,7 @@
|
|
549 |
false,
|
550 |
false,
|
551 |
false,
|
552 |
-
|
553 |
true,
|
554 |
true
|
555 |
],
|
@@ -574,7 +574,7 @@
|
|
574 |
false,
|
575 |
false,
|
576 |
false,
|
577 |
-
|
578 |
false,
|
579 |
false,
|
580 |
false,
|
@@ -656,7 +656,7 @@
|
|
656 |
true,
|
657 |
true,
|
658 |
true,
|
659 |
-
|
660 |
true,
|
661 |
true,
|
662 |
true,
|
@@ -1156,7 +1156,7 @@
|
|
1156 |
false
|
1157 |
],
|
1158 |
"model.decoder.layers.16.encoder_attn.out_proj.lora_E": [
|
1159 |
-
|
1160 |
true,
|
1161 |
false,
|
1162 |
false,
|
@@ -1296,7 +1296,7 @@
|
|
1296 |
false
|
1297 |
],
|
1298 |
"model.decoder.layers.17.encoder_attn.out_proj.lora_E": [
|
1299 |
-
|
1300 |
false,
|
1301 |
true,
|
1302 |
false,
|
@@ -1306,7 +1306,7 @@
|
|
1306 |
false,
|
1307 |
false,
|
1308 |
false,
|
1309 |
-
|
1310 |
false
|
1311 |
],
|
1312 |
"model.decoder.layers.17.encoder_attn.q_proj.lora_E": [
|
@@ -1461,7 +1461,7 @@
|
|
1461 |
false,
|
1462 |
false,
|
1463 |
false,
|
1464 |
-
|
1465 |
],
|
1466 |
"model.decoder.layers.18.encoder_attn.v_proj.lora_E": [
|
1467 |
true,
|
@@ -1521,7 +1521,7 @@
|
|
1521 |
],
|
1522 |
"model.decoder.layers.18.self_attn.out_proj.lora_E": [
|
1523 |
true,
|
1524 |
-
|
1525 |
true,
|
1526 |
false,
|
1527 |
true,
|
@@ -1529,7 +1529,7 @@
|
|
1529 |
true,
|
1530 |
false,
|
1531 |
true,
|
1532 |
-
|
1533 |
true,
|
1534 |
true
|
1535 |
],
|
@@ -1577,13 +1577,13 @@
|
|
1577 |
],
|
1578 |
"model.decoder.layers.19.encoder_attn.out_proj.lora_E": [
|
1579 |
false,
|
1580 |
-
true,
|
1581 |
false,
|
1582 |
false,
|
1583 |
false,
|
1584 |
false,
|
1585 |
false,
|
1586 |
-
|
|
|
1587 |
false,
|
1588 |
false,
|
1589 |
false,
|
@@ -1668,7 +1668,7 @@
|
|
1668 |
true,
|
1669 |
false,
|
1670 |
false,
|
1671 |
-
|
1672 |
true,
|
1673 |
false,
|
1674 |
false
|
@@ -1699,7 +1699,7 @@
|
|
1699 |
false,
|
1700 |
true,
|
1701 |
false,
|
1702 |
-
|
1703 |
],
|
1704 |
"model.decoder.layers.2.encoder_attn.k_proj.lora_E": [
|
1705 |
false,
|
@@ -1800,18 +1800,18 @@
|
|
1800 |
false
|
1801 |
],
|
1802 |
"model.decoder.layers.2.self_attn.out_proj.lora_E": [
|
1803 |
-
true,
|
1804 |
true,
|
1805 |
false,
|
1806 |
false,
|
1807 |
-
true,
|
1808 |
-
true,
|
1809 |
false,
|
1810 |
true,
|
|
|
|
|
|
|
1811 |
true,
|
1812 |
true,
|
1813 |
true,
|
1814 |
-
|
1815 |
],
|
1816 |
"model.decoder.layers.2.self_attn.q_proj.lora_E": [
|
1817 |
false,
|
@@ -1863,7 +1863,7 @@
|
|
1863 |
true,
|
1864 |
false,
|
1865 |
true,
|
1866 |
-
|
1867 |
true,
|
1868 |
true,
|
1869 |
true,
|
@@ -1922,7 +1922,7 @@
|
|
1922 |
true,
|
1923 |
true,
|
1924 |
true,
|
1925 |
-
|
1926 |
true
|
1927 |
],
|
1928 |
"model.decoder.layers.20.self_attn.k_proj.lora_E": [
|
@@ -1942,7 +1942,7 @@
|
|
1942 |
"model.decoder.layers.20.self_attn.out_proj.lora_E": [
|
1943 |
true,
|
1944 |
false,
|
1945 |
-
|
1946 |
true,
|
1947 |
false,
|
1948 |
false,
|
@@ -2053,7 +2053,7 @@
|
|
2053 |
],
|
2054 |
"model.decoder.layers.21.fc2.lora_E": [
|
2055 |
true,
|
2056 |
-
|
2057 |
true,
|
2058 |
true,
|
2059 |
false,
|
@@ -2067,12 +2067,12 @@
|
|
2067 |
],
|
2068 |
"model.decoder.layers.21.self_attn.k_proj.lora_E": [
|
2069 |
true,
|
2070 |
-
|
2071 |
-
|
2072 |
true,
|
2073 |
true,
|
2074 |
false,
|
2075 |
-
|
2076 |
false,
|
2077 |
true,
|
2078 |
true,
|
@@ -2094,17 +2094,17 @@
|
|
2094 |
false
|
2095 |
],
|
2096 |
"model.decoder.layers.21.self_attn.q_proj.lora_E": [
|
2097 |
-
true,
|
2098 |
false,
|
2099 |
false,
|
2100 |
false,
|
2101 |
-
true,
|
2102 |
-
true,
|
2103 |
false,
|
2104 |
false,
|
2105 |
true,
|
|
|
|
|
2106 |
true,
|
2107 |
false,
|
|
|
2108 |
false
|
2109 |
],
|
2110 |
"model.decoder.layers.21.self_attn.v_proj.lora_E": [
|
@@ -2217,7 +2217,7 @@
|
|
2217 |
false,
|
2218 |
false,
|
2219 |
false,
|
2220 |
-
|
2221 |
],
|
2222 |
"model.decoder.layers.22.self_attn.out_proj.lora_E": [
|
2223 |
false,
|
@@ -2226,7 +2226,7 @@
|
|
2226 |
true,
|
2227 |
false,
|
2228 |
true,
|
2229 |
-
|
2230 |
true,
|
2231 |
true,
|
2232 |
false,
|
@@ -2257,7 +2257,7 @@
|
|
2257 |
true,
|
2258 |
true,
|
2259 |
true,
|
2260 |
-
|
2261 |
false,
|
2262 |
true
|
2263 |
],
|
@@ -2439,7 +2439,7 @@
|
|
2439 |
false,
|
2440 |
false,
|
2441 |
false,
|
2442 |
-
|
2443 |
false,
|
2444 |
false
|
2445 |
],
|
@@ -2481,7 +2481,7 @@
|
|
2481 |
true,
|
2482 |
true,
|
2483 |
true,
|
2484 |
-
|
2485 |
false,
|
2486 |
true
|
2487 |
],
|
@@ -2492,7 +2492,7 @@
|
|
2492 |
true,
|
2493 |
true,
|
2494 |
true,
|
2495 |
-
|
2496 |
true,
|
2497 |
true,
|
2498 |
true,
|
@@ -2508,7 +2508,7 @@
|
|
2508 |
true,
|
2509 |
false,
|
2510 |
true,
|
2511 |
-
|
2512 |
false,
|
2513 |
false,
|
2514 |
false
|
@@ -2536,7 +2536,7 @@
|
|
2536 |
true,
|
2537 |
true,
|
2538 |
false,
|
2539 |
-
|
2540 |
false,
|
2541 |
true,
|
2542 |
true
|
@@ -2558,10 +2558,10 @@
|
|
2558 |
"model.decoder.layers.25.encoder_attn.out_proj.lora_E": [
|
2559 |
true,
|
2560 |
true,
|
2561 |
-
|
2562 |
-
true,
|
2563 |
true,
|
2564 |
false,
|
|
|
2565 |
true,
|
2566 |
true,
|
2567 |
true,
|
@@ -2627,7 +2627,7 @@
|
|
2627 |
],
|
2628 |
"model.decoder.layers.25.self_attn.k_proj.lora_E": [
|
2629 |
true,
|
2630 |
-
|
2631 |
true,
|
2632 |
true,
|
2633 |
false,
|
@@ -2649,7 +2649,7 @@
|
|
2649 |
false,
|
2650 |
false,
|
2651 |
true,
|
2652 |
-
|
2653 |
false,
|
2654 |
false
|
2655 |
],
|
@@ -2698,7 +2698,7 @@
|
|
2698 |
"model.decoder.layers.26.encoder_attn.out_proj.lora_E": [
|
2699 |
true,
|
2700 |
true,
|
2701 |
-
|
2702 |
true,
|
2703 |
true,
|
2704 |
true,
|
@@ -2706,7 +2706,7 @@
|
|
2706 |
true,
|
2707 |
true,
|
2708 |
true,
|
2709 |
-
|
2710 |
true
|
2711 |
],
|
2712 |
"model.decoder.layers.26.encoder_attn.q_proj.lora_E": [
|
@@ -2768,7 +2768,7 @@
|
|
2768 |
"model.decoder.layers.26.self_attn.k_proj.lora_E": [
|
2769 |
false,
|
2770 |
true,
|
2771 |
-
|
2772 |
true,
|
2773 |
false,
|
2774 |
false,
|
@@ -2796,7 +2796,6 @@
|
|
2796 |
"model.decoder.layers.26.self_attn.q_proj.lora_E": [
|
2797 |
false,
|
2798 |
false,
|
2799 |
-
true,
|
2800 |
false,
|
2801 |
false,
|
2802 |
false,
|
@@ -2805,7 +2804,8 @@
|
|
2805 |
false,
|
2806 |
false,
|
2807 |
false,
|
2808 |
-
|
|
|
2809 |
],
|
2810 |
"model.decoder.layers.26.self_attn.v_proj.lora_E": [
|
2811 |
false,
|
@@ -2841,12 +2841,12 @@
|
|
2841 |
true,
|
2842 |
true,
|
2843 |
true,
|
|
|
2844 |
true,
|
2845 |
true,
|
2846 |
true,
|
2847 |
-
|
2848 |
-
|
2849 |
-
true,
|
2850 |
true
|
2851 |
],
|
2852 |
"model.decoder.layers.27.encoder_attn.q_proj.lora_E": [
|
@@ -2925,7 +2925,7 @@
|
|
2925 |
true,
|
2926 |
false,
|
2927 |
false,
|
2928 |
-
|
2929 |
false,
|
2930 |
false,
|
2931 |
false,
|
@@ -2977,7 +2977,7 @@
|
|
2977 |
],
|
2978 |
"model.decoder.layers.28.encoder_attn.out_proj.lora_E": [
|
2979 |
true,
|
2980 |
-
|
2981 |
true,
|
2982 |
true,
|
2983 |
true,
|
@@ -3116,18 +3116,18 @@
|
|
3116 |
false
|
3117 |
],
|
3118 |
"model.decoder.layers.29.encoder_attn.out_proj.lora_E": [
|
3119 |
-
true,
|
3120 |
-
true,
|
3121 |
true,
|
3122 |
false,
|
3123 |
false,
|
3124 |
false,
|
3125 |
-
true,
|
3126 |
false,
|
3127 |
false,
|
3128 |
false,
|
3129 |
false,
|
3130 |
-
|
|
|
|
|
|
|
3131 |
],
|
3132 |
"model.decoder.layers.29.encoder_attn.q_proj.lora_E": [
|
3133 |
false,
|
@@ -3259,9 +3259,9 @@
|
|
3259 |
true,
|
3260 |
true,
|
3261 |
false,
|
|
|
3262 |
true,
|
3263 |
-
|
3264 |
-
true,
|
3265 |
true,
|
3266 |
true,
|
3267 |
false,
|
@@ -3396,17 +3396,17 @@
|
|
3396 |
false
|
3397 |
],
|
3398 |
"model.decoder.layers.30.encoder_attn.out_proj.lora_E": [
|
3399 |
-
true,
|
3400 |
true,
|
3401 |
false,
|
3402 |
false,
|
3403 |
false,
|
3404 |
false,
|
3405 |
-
|
|
|
|
|
3406 |
false,
|
3407 |
true,
|
3408 |
-
|
3409 |
-
true,
|
3410 |
false
|
3411 |
],
|
3412 |
"model.decoder.layers.30.encoder_attn.q_proj.lora_E": [
|
@@ -3509,7 +3509,7 @@
|
|
3509 |
],
|
3510 |
"model.decoder.layers.30.self_attn.v_proj.lora_E": [
|
3511 |
false,
|
3512 |
-
|
3513 |
true,
|
3514 |
false,
|
3515 |
false,
|
@@ -3517,7 +3517,7 @@
|
|
3517 |
false,
|
3518 |
false,
|
3519 |
false,
|
3520 |
-
|
3521 |
false,
|
3522 |
true
|
3523 |
],
|
@@ -3566,7 +3566,7 @@
|
|
3566 |
"model.decoder.layers.31.encoder_attn.v_proj.lora_E": [
|
3567 |
false,
|
3568 |
false,
|
3569 |
-
|
3570 |
false,
|
3571 |
true,
|
3572 |
false,
|
@@ -3622,7 +3622,7 @@
|
|
3622 |
"model.decoder.layers.31.self_attn.out_proj.lora_E": [
|
3623 |
false,
|
3624 |
false,
|
3625 |
-
|
3626 |
false,
|
3627 |
false,
|
3628 |
false,
|
@@ -3704,18 +3704,18 @@
|
|
3704 |
false
|
3705 |
],
|
3706 |
"model.decoder.layers.4.encoder_attn.v_proj.lora_E": [
|
3707 |
-
true,
|
3708 |
true,
|
3709 |
false,
|
3710 |
-
|
|
|
3711 |
false,
|
3712 |
false,
|
3713 |
true,
|
3714 |
true,
|
3715 |
false,
|
3716 |
true,
|
3717 |
-
|
3718 |
-
|
3719 |
],
|
3720 |
"model.decoder.layers.4.fc1.lora_E": [
|
3721 |
true,
|
@@ -3853,7 +3853,7 @@
|
|
3853 |
false,
|
3854 |
true,
|
3855 |
true,
|
3856 |
-
|
3857 |
false,
|
3858 |
true
|
3859 |
],
|
@@ -4331,7 +4331,7 @@
|
|
4331 |
false,
|
4332 |
false,
|
4333 |
false,
|
4334 |
-
|
4335 |
],
|
4336 |
"model.decoder.layers.8.self_attn.q_proj.lora_E": [
|
4337 |
false,
|
@@ -4443,7 +4443,7 @@
|
|
4443 |
true,
|
4444 |
true,
|
4445 |
true,
|
4446 |
-
|
4447 |
],
|
4448 |
"model.decoder.layers.9.self_attn.k_proj.lora_E": [
|
4449 |
false,
|
@@ -4550,8 +4550,8 @@
|
|
4550 |
true,
|
4551 |
false,
|
4552 |
false,
|
4553 |
-
|
4554 |
-
|
4555 |
false,
|
4556 |
false,
|
4557 |
false,
|
@@ -5015,7 +5015,7 @@
|
|
5015 |
false,
|
5016 |
true,
|
5017 |
false,
|
5018 |
-
|
5019 |
false,
|
5020 |
true
|
5021 |
],
|
@@ -5136,7 +5136,7 @@
|
|
5136 |
true,
|
5137 |
false,
|
5138 |
false,
|
5139 |
-
|
5140 |
true,
|
5141 |
false,
|
5142 |
false,
|
@@ -5223,7 +5223,7 @@
|
|
5223 |
true,
|
5224 |
false,
|
5225 |
true,
|
5226 |
-
|
5227 |
false,
|
5228 |
false,
|
5229 |
true,
|
@@ -5307,7 +5307,7 @@
|
|
5307 |
true,
|
5308 |
true,
|
5309 |
false,
|
5310 |
-
|
5311 |
true,
|
5312 |
false,
|
5313 |
true,
|
@@ -5384,16 +5384,16 @@
|
|
5384 |
false
|
5385 |
],
|
5386 |
"model.encoder.layers.18.self_attn.out_proj.lora_E": [
|
5387 |
-
true,
|
5388 |
false,
|
5389 |
-
|
|
|
|
|
5390 |
false,
|
5391 |
false,
|
5392 |
false,
|
5393 |
false,
|
5394 |
true,
|
5395 |
-
|
5396 |
-
true,
|
5397 |
false,
|
5398 |
false
|
5399 |
],
|
@@ -5806,8 +5806,8 @@
|
|
5806 |
"model.encoder.layers.22.self_attn.out_proj.lora_E": [
|
5807 |
false,
|
5808 |
false,
|
5809 |
-
|
5810 |
-
|
5811 |
false,
|
5812 |
true,
|
5813 |
false,
|
@@ -5898,7 +5898,7 @@
|
|
5898 |
true,
|
5899 |
false,
|
5900 |
false,
|
5901 |
-
|
5902 |
true
|
5903 |
],
|
5904 |
"model.encoder.layers.23.self_attn.q_proj.lora_E": [
|
@@ -6063,7 +6063,7 @@
|
|
6063 |
false,
|
6064 |
false,
|
6065 |
true,
|
6066 |
-
|
6067 |
false,
|
6068 |
false,
|
6069 |
false,
|
@@ -6425,7 +6425,7 @@
|
|
6425 |
true,
|
6426 |
true,
|
6427 |
true,
|
6428 |
-
|
6429 |
true,
|
6430 |
true,
|
6431 |
true,
|
@@ -6569,7 +6569,7 @@
|
|
6569 |
true,
|
6570 |
true,
|
6571 |
true,
|
6572 |
-
|
6573 |
true,
|
6574 |
false
|
6575 |
],
|
@@ -6677,8 +6677,8 @@
|
|
6677 |
true,
|
6678 |
true,
|
6679 |
false,
|
6680 |
-
|
6681 |
-
|
6682 |
true,
|
6683 |
true,
|
6684 |
true,
|
@@ -6794,7 +6794,7 @@
|
|
6794 |
false,
|
6795 |
false,
|
6796 |
false,
|
6797 |
-
|
6798 |
false
|
6799 |
],
|
6800 |
"model.encoder.layers.5.self_attn.k_proj.lora_E": [
|
@@ -7182,7 +7182,7 @@
|
|
7182 |
false,
|
7183 |
false,
|
7184 |
false,
|
7185 |
-
|
7186 |
false,
|
7187 |
false,
|
7188 |
false,
|
|
|
210 |
true,
|
211 |
false,
|
212 |
false,
|
|
|
213 |
false,
|
214 |
false,
|
215 |
false,
|
216 |
false,
|
217 |
+
false,
|
218 |
+
false
|
219 |
],
|
220 |
"model.decoder.layers.1.fc1.lora_E": [
|
221 |
true,
|
|
|
365 |
true,
|
366 |
true,
|
367 |
true,
|
368 |
+
false,
|
369 |
true,
|
370 |
true,
|
371 |
true,
|
|
|
549 |
false,
|
550 |
false,
|
551 |
false,
|
552 |
+
false,
|
553 |
true,
|
554 |
true
|
555 |
],
|
|
|
574 |
false,
|
575 |
false,
|
576 |
false,
|
577 |
+
false,
|
578 |
false,
|
579 |
false,
|
580 |
false,
|
|
|
656 |
true,
|
657 |
true,
|
658 |
true,
|
659 |
+
false,
|
660 |
true,
|
661 |
true,
|
662 |
true,
|
|
|
1156 |
false
|
1157 |
],
|
1158 |
"model.decoder.layers.16.encoder_attn.out_proj.lora_E": [
|
1159 |
+
false,
|
1160 |
true,
|
1161 |
false,
|
1162 |
false,
|
|
|
1296 |
false
|
1297 |
],
|
1298 |
"model.decoder.layers.17.encoder_attn.out_proj.lora_E": [
|
1299 |
+
false,
|
1300 |
false,
|
1301 |
true,
|
1302 |
false,
|
|
|
1306 |
false,
|
1307 |
false,
|
1308 |
false,
|
1309 |
+
false,
|
1310 |
false
|
1311 |
],
|
1312 |
"model.decoder.layers.17.encoder_attn.q_proj.lora_E": [
|
|
|
1461 |
false,
|
1462 |
false,
|
1463 |
false,
|
1464 |
+
false
|
1465 |
],
|
1466 |
"model.decoder.layers.18.encoder_attn.v_proj.lora_E": [
|
1467 |
true,
|
|
|
1521 |
],
|
1522 |
"model.decoder.layers.18.self_attn.out_proj.lora_E": [
|
1523 |
true,
|
1524 |
+
false,
|
1525 |
true,
|
1526 |
false,
|
1527 |
true,
|
|
|
1529 |
true,
|
1530 |
false,
|
1531 |
true,
|
1532 |
+
false,
|
1533 |
true,
|
1534 |
true
|
1535 |
],
|
|
|
1577 |
],
|
1578 |
"model.decoder.layers.19.encoder_attn.out_proj.lora_E": [
|
1579 |
false,
|
|
|
1580 |
false,
|
1581 |
false,
|
1582 |
false,
|
1583 |
false,
|
1584 |
false,
|
1585 |
+
false,
|
1586 |
+
false,
|
1587 |
false,
|
1588 |
false,
|
1589 |
false,
|
|
|
1668 |
true,
|
1669 |
false,
|
1670 |
false,
|
1671 |
+
false,
|
1672 |
true,
|
1673 |
false,
|
1674 |
false
|
|
|
1699 |
false,
|
1700 |
true,
|
1701 |
false,
|
1702 |
+
false
|
1703 |
],
|
1704 |
"model.decoder.layers.2.encoder_attn.k_proj.lora_E": [
|
1705 |
false,
|
|
|
1800 |
false
|
1801 |
],
|
1802 |
"model.decoder.layers.2.self_attn.out_proj.lora_E": [
|
|
|
1803 |
true,
|
1804 |
false,
|
1805 |
false,
|
|
|
|
|
1806 |
false,
|
1807 |
true,
|
1808 |
+
false,
|
1809 |
+
false,
|
1810 |
+
false,
|
1811 |
true,
|
1812 |
true,
|
1813 |
true,
|
1814 |
+
false
|
1815 |
],
|
1816 |
"model.decoder.layers.2.self_attn.q_proj.lora_E": [
|
1817 |
false,
|
|
|
1863 |
true,
|
1864 |
false,
|
1865 |
true,
|
1866 |
+
false,
|
1867 |
true,
|
1868 |
true,
|
1869 |
true,
|
|
|
1922 |
true,
|
1923 |
true,
|
1924 |
true,
|
1925 |
+
false,
|
1926 |
true
|
1927 |
],
|
1928 |
"model.decoder.layers.20.self_attn.k_proj.lora_E": [
|
|
|
1942 |
"model.decoder.layers.20.self_attn.out_proj.lora_E": [
|
1943 |
true,
|
1944 |
false,
|
1945 |
+
false,
|
1946 |
true,
|
1947 |
false,
|
1948 |
false,
|
|
|
2053 |
],
|
2054 |
"model.decoder.layers.21.fc2.lora_E": [
|
2055 |
true,
|
2056 |
+
false,
|
2057 |
true,
|
2058 |
true,
|
2059 |
false,
|
|
|
2067 |
],
|
2068 |
"model.decoder.layers.21.self_attn.k_proj.lora_E": [
|
2069 |
true,
|
2070 |
+
false,
|
2071 |
+
false,
|
2072 |
true,
|
2073 |
true,
|
2074 |
false,
|
2075 |
+
false,
|
2076 |
false,
|
2077 |
true,
|
2078 |
true,
|
|
|
2094 |
false
|
2095 |
],
|
2096 |
"model.decoder.layers.21.self_attn.q_proj.lora_E": [
|
|
|
2097 |
false,
|
2098 |
false,
|
2099 |
false,
|
|
|
|
|
2100 |
false,
|
2101 |
false,
|
2102 |
true,
|
2103 |
+
false,
|
2104 |
+
false,
|
2105 |
true,
|
2106 |
false,
|
2107 |
+
false,
|
2108 |
false
|
2109 |
],
|
2110 |
"model.decoder.layers.21.self_attn.v_proj.lora_E": [
|
|
|
2217 |
false,
|
2218 |
false,
|
2219 |
false,
|
2220 |
+
false
|
2221 |
],
|
2222 |
"model.decoder.layers.22.self_attn.out_proj.lora_E": [
|
2223 |
false,
|
|
|
2226 |
true,
|
2227 |
false,
|
2228 |
true,
|
2229 |
+
false,
|
2230 |
true,
|
2231 |
true,
|
2232 |
false,
|
|
|
2257 |
true,
|
2258 |
true,
|
2259 |
true,
|
2260 |
+
false,
|
2261 |
false,
|
2262 |
true
|
2263 |
],
|
|
|
2439 |
false,
|
2440 |
false,
|
2441 |
false,
|
2442 |
+
false,
|
2443 |
false,
|
2444 |
false
|
2445 |
],
|
|
|
2481 |
true,
|
2482 |
true,
|
2483 |
true,
|
2484 |
+
false,
|
2485 |
false,
|
2486 |
true
|
2487 |
],
|
|
|
2492 |
true,
|
2493 |
true,
|
2494 |
true,
|
2495 |
+
false,
|
2496 |
true,
|
2497 |
true,
|
2498 |
true,
|
|
|
2508 |
true,
|
2509 |
false,
|
2510 |
true,
|
2511 |
+
false,
|
2512 |
false,
|
2513 |
false,
|
2514 |
false
|
|
|
2536 |
true,
|
2537 |
true,
|
2538 |
false,
|
2539 |
+
false,
|
2540 |
false,
|
2541 |
true,
|
2542 |
true
|
|
|
2558 |
"model.decoder.layers.25.encoder_attn.out_proj.lora_E": [
|
2559 |
true,
|
2560 |
true,
|
2561 |
+
false,
|
|
|
2562 |
true,
|
2563 |
false,
|
2564 |
+
false,
|
2565 |
true,
|
2566 |
true,
|
2567 |
true,
|
|
|
2627 |
],
|
2628 |
"model.decoder.layers.25.self_attn.k_proj.lora_E": [
|
2629 |
true,
|
2630 |
+
false,
|
2631 |
true,
|
2632 |
true,
|
2633 |
false,
|
|
|
2649 |
false,
|
2650 |
false,
|
2651 |
true,
|
2652 |
+
false,
|
2653 |
false,
|
2654 |
false
|
2655 |
],
|
|
|
2698 |
"model.decoder.layers.26.encoder_attn.out_proj.lora_E": [
|
2699 |
true,
|
2700 |
true,
|
2701 |
+
false,
|
2702 |
true,
|
2703 |
true,
|
2704 |
true,
|
|
|
2706 |
true,
|
2707 |
true,
|
2708 |
true,
|
2709 |
+
false,
|
2710 |
true
|
2711 |
],
|
2712 |
"model.decoder.layers.26.encoder_attn.q_proj.lora_E": [
|
|
|
2768 |
"model.decoder.layers.26.self_attn.k_proj.lora_E": [
|
2769 |
false,
|
2770 |
true,
|
2771 |
+
false,
|
2772 |
true,
|
2773 |
false,
|
2774 |
false,
|
|
|
2796 |
"model.decoder.layers.26.self_attn.q_proj.lora_E": [
|
2797 |
false,
|
2798 |
false,
|
|
|
2799 |
false,
|
2800 |
false,
|
2801 |
false,
|
|
|
2804 |
false,
|
2805 |
false,
|
2806 |
false,
|
2807 |
+
false,
|
2808 |
+
false
|
2809 |
],
|
2810 |
"model.decoder.layers.26.self_attn.v_proj.lora_E": [
|
2811 |
false,
|
|
|
2841 |
true,
|
2842 |
true,
|
2843 |
true,
|
2844 |
+
false,
|
2845 |
true,
|
2846 |
true,
|
2847 |
true,
|
2848 |
+
false,
|
2849 |
+
false,
|
|
|
2850 |
true
|
2851 |
],
|
2852 |
"model.decoder.layers.27.encoder_attn.q_proj.lora_E": [
|
|
|
2925 |
true,
|
2926 |
false,
|
2927 |
false,
|
2928 |
+
false,
|
2929 |
false,
|
2930 |
false,
|
2931 |
false,
|
|
|
2977 |
],
|
2978 |
"model.decoder.layers.28.encoder_attn.out_proj.lora_E": [
|
2979 |
true,
|
2980 |
+
false,
|
2981 |
true,
|
2982 |
true,
|
2983 |
true,
|
|
|
3116 |
false
|
3117 |
],
|
3118 |
"model.decoder.layers.29.encoder_attn.out_proj.lora_E": [
|
|
|
|
|
3119 |
true,
|
3120 |
false,
|
3121 |
false,
|
3122 |
false,
|
|
|
3123 |
false,
|
3124 |
false,
|
3125 |
false,
|
3126 |
false,
|
3127 |
+
false,
|
3128 |
+
false,
|
3129 |
+
false,
|
3130 |
+
false
|
3131 |
],
|
3132 |
"model.decoder.layers.29.encoder_attn.q_proj.lora_E": [
|
3133 |
false,
|
|
|
3259 |
true,
|
3260 |
true,
|
3261 |
false,
|
3262 |
+
false,
|
3263 |
true,
|
3264 |
+
false,
|
|
|
3265 |
true,
|
3266 |
true,
|
3267 |
false,
|
|
|
3396 |
false
|
3397 |
],
|
3398 |
"model.decoder.layers.30.encoder_attn.out_proj.lora_E": [
|
|
|
3399 |
true,
|
3400 |
false,
|
3401 |
false,
|
3402 |
false,
|
3403 |
false,
|
3404 |
+
false,
|
3405 |
+
false,
|
3406 |
+
false,
|
3407 |
false,
|
3408 |
true,
|
3409 |
+
false,
|
|
|
3410 |
false
|
3411 |
],
|
3412 |
"model.decoder.layers.30.encoder_attn.q_proj.lora_E": [
|
|
|
3509 |
],
|
3510 |
"model.decoder.layers.30.self_attn.v_proj.lora_E": [
|
3511 |
false,
|
3512 |
+
false,
|
3513 |
true,
|
3514 |
false,
|
3515 |
false,
|
|
|
3517 |
false,
|
3518 |
false,
|
3519 |
false,
|
3520 |
+
false,
|
3521 |
false,
|
3522 |
true
|
3523 |
],
|
|
|
3566 |
"model.decoder.layers.31.encoder_attn.v_proj.lora_E": [
|
3567 |
false,
|
3568 |
false,
|
3569 |
+
false,
|
3570 |
false,
|
3571 |
true,
|
3572 |
false,
|
|
|
3622 |
"model.decoder.layers.31.self_attn.out_proj.lora_E": [
|
3623 |
false,
|
3624 |
false,
|
3625 |
+
false,
|
3626 |
false,
|
3627 |
false,
|
3628 |
false,
|
|
|
3704 |
false
|
3705 |
],
|
3706 |
"model.decoder.layers.4.encoder_attn.v_proj.lora_E": [
|
|
|
3707 |
true,
|
3708 |
false,
|
3709 |
+
false,
|
3710 |
+
false,
|
3711 |
false,
|
3712 |
false,
|
3713 |
true,
|
3714 |
true,
|
3715 |
false,
|
3716 |
true,
|
3717 |
+
false,
|
3718 |
+
false
|
3719 |
],
|
3720 |
"model.decoder.layers.4.fc1.lora_E": [
|
3721 |
true,
|
|
|
3853 |
false,
|
3854 |
true,
|
3855 |
true,
|
3856 |
+
false,
|
3857 |
false,
|
3858 |
true
|
3859 |
],
|
|
|
4331 |
false,
|
4332 |
false,
|
4333 |
false,
|
4334 |
+
false
|
4335 |
],
|
4336 |
"model.decoder.layers.8.self_attn.q_proj.lora_E": [
|
4337 |
false,
|
|
|
4443 |
true,
|
4444 |
true,
|
4445 |
true,
|
4446 |
+
false
|
4447 |
],
|
4448 |
"model.decoder.layers.9.self_attn.k_proj.lora_E": [
|
4449 |
false,
|
|
|
4550 |
true,
|
4551 |
false,
|
4552 |
false,
|
4553 |
+
false,
|
4554 |
+
false,
|
4555 |
false,
|
4556 |
false,
|
4557 |
false,
|
|
|
5015 |
false,
|
5016 |
true,
|
5017 |
false,
|
5018 |
+
false,
|
5019 |
false,
|
5020 |
true
|
5021 |
],
|
|
|
5136 |
true,
|
5137 |
false,
|
5138 |
false,
|
5139 |
+
false,
|
5140 |
true,
|
5141 |
false,
|
5142 |
false,
|
|
|
5223 |
true,
|
5224 |
false,
|
5225 |
true,
|
5226 |
+
false,
|
5227 |
false,
|
5228 |
false,
|
5229 |
true,
|
|
|
5307 |
true,
|
5308 |
true,
|
5309 |
false,
|
5310 |
+
false,
|
5311 |
true,
|
5312 |
false,
|
5313 |
true,
|
|
|
5384 |
false
|
5385 |
],
|
5386 |
"model.encoder.layers.18.self_attn.out_proj.lora_E": [
|
|
|
5387 |
false,
|
5388 |
+
false,
|
5389 |
+
false,
|
5390 |
+
false,
|
5391 |
false,
|
5392 |
false,
|
5393 |
false,
|
5394 |
false,
|
5395 |
true,
|
5396 |
+
false,
|
|
|
5397 |
false,
|
5398 |
false
|
5399 |
],
|
|
|
5806 |
"model.encoder.layers.22.self_attn.out_proj.lora_E": [
|
5807 |
false,
|
5808 |
false,
|
5809 |
+
false,
|
5810 |
+
false,
|
5811 |
false,
|
5812 |
true,
|
5813 |
false,
|
|
|
5898 |
true,
|
5899 |
false,
|
5900 |
false,
|
5901 |
+
false,
|
5902 |
true
|
5903 |
],
|
5904 |
"model.encoder.layers.23.self_attn.q_proj.lora_E": [
|
|
|
6063 |
false,
|
6064 |
false,
|
6065 |
true,
|
6066 |
+
false,
|
6067 |
false,
|
6068 |
false,
|
6069 |
false,
|
|
|
6425 |
true,
|
6426 |
true,
|
6427 |
true,
|
6428 |
+
false,
|
6429 |
true,
|
6430 |
true,
|
6431 |
true,
|
|
|
6569 |
true,
|
6570 |
true,
|
6571 |
true,
|
6572 |
+
false,
|
6573 |
true,
|
6574 |
false
|
6575 |
],
|
|
|
6677 |
true,
|
6678 |
true,
|
6679 |
false,
|
6680 |
+
false,
|
6681 |
+
false,
|
6682 |
true,
|
6683 |
true,
|
6684 |
true,
|
|
|
6794 |
false,
|
6795 |
false,
|
6796 |
false,
|
6797 |
+
false,
|
6798 |
false
|
6799 |
],
|
6800 |
"model.encoder.layers.5.self_attn.k_proj.lora_E": [
|
|
|
7182 |
false,
|
7183 |
false,
|
7184 |
false,
|
7185 |
+
false,
|
7186 |
false,
|
7187 |
false,
|
7188 |
false,
|
adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:067f61cbd367196dd55812c979bcc90a913f1a57880157f22943d127e4c40ecd
|
3 |
+
size 38697637
|
best_checkpoint/README.md
CHANGED
@@ -268,6 +268,90 @@ The following `bitsandbytes` quantization config was used during training:
|
|
268 |
- bnb_4bit_use_double_quant: False
|
269 |
- bnb_4bit_compute_dtype: float32
|
270 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
271 |
The following `bitsandbytes` quantization config was used during training:
|
272 |
- quant_method: bitsandbytes
|
273 |
- load_in_8bit: True
|
@@ -303,5 +387,12 @@ The following `bitsandbytes` quantization config was used during training:
|
|
303 |
- PEFT 0.5.0
|
304 |
- PEFT 0.5.0
|
305 |
- PEFT 0.5.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
|
307 |
- PEFT 0.5.0
|
|
|
268 |
- bnb_4bit_use_double_quant: False
|
269 |
- bnb_4bit_compute_dtype: float32
|
270 |
|
271 |
+
The following `bitsandbytes` quantization config was used during training:
|
272 |
+
- quant_method: bitsandbytes
|
273 |
+
- load_in_8bit: True
|
274 |
+
- load_in_4bit: False
|
275 |
+
- llm_int8_threshold: 6.0
|
276 |
+
- llm_int8_skip_modules: None
|
277 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
278 |
+
- llm_int8_has_fp16_weight: False
|
279 |
+
- bnb_4bit_quant_type: fp4
|
280 |
+
- bnb_4bit_use_double_quant: False
|
281 |
+
- bnb_4bit_compute_dtype: float32
|
282 |
+
|
283 |
+
The following `bitsandbytes` quantization config was used during training:
|
284 |
+
- quant_method: bitsandbytes
|
285 |
+
- load_in_8bit: True
|
286 |
+
- load_in_4bit: False
|
287 |
+
- llm_int8_threshold: 6.0
|
288 |
+
- llm_int8_skip_modules: None
|
289 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
290 |
+
- llm_int8_has_fp16_weight: False
|
291 |
+
- bnb_4bit_quant_type: fp4
|
292 |
+
- bnb_4bit_use_double_quant: False
|
293 |
+
- bnb_4bit_compute_dtype: float32
|
294 |
+
|
295 |
+
The following `bitsandbytes` quantization config was used during training:
|
296 |
+
- quant_method: bitsandbytes
|
297 |
+
- load_in_8bit: True
|
298 |
+
- load_in_4bit: False
|
299 |
+
- llm_int8_threshold: 6.0
|
300 |
+
- llm_int8_skip_modules: None
|
301 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
302 |
+
- llm_int8_has_fp16_weight: False
|
303 |
+
- bnb_4bit_quant_type: fp4
|
304 |
+
- bnb_4bit_use_double_quant: False
|
305 |
+
- bnb_4bit_compute_dtype: float32
|
306 |
+
|
307 |
+
The following `bitsandbytes` quantization config was used during training:
|
308 |
+
- quant_method: bitsandbytes
|
309 |
+
- load_in_8bit: True
|
310 |
+
- load_in_4bit: False
|
311 |
+
- llm_int8_threshold: 6.0
|
312 |
+
- llm_int8_skip_modules: None
|
313 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
314 |
+
- llm_int8_has_fp16_weight: False
|
315 |
+
- bnb_4bit_quant_type: fp4
|
316 |
+
- bnb_4bit_use_double_quant: False
|
317 |
+
- bnb_4bit_compute_dtype: float32
|
318 |
+
|
319 |
+
The following `bitsandbytes` quantization config was used during training:
|
320 |
+
- quant_method: bitsandbytes
|
321 |
+
- load_in_8bit: True
|
322 |
+
- load_in_4bit: False
|
323 |
+
- llm_int8_threshold: 6.0
|
324 |
+
- llm_int8_skip_modules: None
|
325 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
326 |
+
- llm_int8_has_fp16_weight: False
|
327 |
+
- bnb_4bit_quant_type: fp4
|
328 |
+
- bnb_4bit_use_double_quant: False
|
329 |
+
- bnb_4bit_compute_dtype: float32
|
330 |
+
|
331 |
+
The following `bitsandbytes` quantization config was used during training:
|
332 |
+
- quant_method: bitsandbytes
|
333 |
+
- load_in_8bit: True
|
334 |
+
- load_in_4bit: False
|
335 |
+
- llm_int8_threshold: 6.0
|
336 |
+
- llm_int8_skip_modules: None
|
337 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
338 |
+
- llm_int8_has_fp16_weight: False
|
339 |
+
- bnb_4bit_quant_type: fp4
|
340 |
+
- bnb_4bit_use_double_quant: False
|
341 |
+
- bnb_4bit_compute_dtype: float32
|
342 |
+
|
343 |
+
The following `bitsandbytes` quantization config was used during training:
|
344 |
+
- quant_method: bitsandbytes
|
345 |
+
- load_in_8bit: True
|
346 |
+
- load_in_4bit: False
|
347 |
+
- llm_int8_threshold: 6.0
|
348 |
+
- llm_int8_skip_modules: None
|
349 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
350 |
+
- llm_int8_has_fp16_weight: False
|
351 |
+
- bnb_4bit_quant_type: fp4
|
352 |
+
- bnb_4bit_use_double_quant: False
|
353 |
+
- bnb_4bit_compute_dtype: float32
|
354 |
+
|
355 |
The following `bitsandbytes` quantization config was used during training:
|
356 |
- quant_method: bitsandbytes
|
357 |
- load_in_8bit: True
|
|
|
387 |
- PEFT 0.5.0
|
388 |
- PEFT 0.5.0
|
389 |
- PEFT 0.5.0
|
390 |
+
- PEFT 0.5.0
|
391 |
+
- PEFT 0.5.0
|
392 |
+
- PEFT 0.5.0
|
393 |
+
- PEFT 0.5.0
|
394 |
+
- PEFT 0.5.0
|
395 |
+
- PEFT 0.5.0
|
396 |
+
- PEFT 0.5.0
|
397 |
|
398 |
- PEFT 0.5.0
|
best_checkpoint/adapter_config.json
CHANGED
@@ -38,7 +38,7 @@
|
|
38 |
"model.decoder.layers.0.encoder_attn.out_proj.lora_E": [
|
39 |
true,
|
40 |
false,
|
41 |
-
|
42 |
false,
|
43 |
false,
|
44 |
true,
|
@@ -73,7 +73,7 @@
|
|
73 |
false,
|
74 |
false,
|
75 |
false,
|
76 |
-
|
77 |
true,
|
78 |
true
|
79 |
],
|
@@ -102,7 +102,7 @@
|
|
102 |
true,
|
103 |
true,
|
104 |
true,
|
105 |
-
|
106 |
true
|
107 |
],
|
108 |
"model.decoder.layers.0.self_attn.k_proj.lora_E": [
|
@@ -215,7 +215,7 @@
|
|
215 |
false,
|
216 |
false,
|
217 |
false,
|
218 |
-
|
219 |
],
|
220 |
"model.decoder.layers.1.fc1.lora_E": [
|
221 |
true,
|
@@ -365,7 +365,7 @@
|
|
365 |
true,
|
366 |
true,
|
367 |
true,
|
368 |
-
|
369 |
true,
|
370 |
true,
|
371 |
true,
|
@@ -504,12 +504,12 @@
|
|
504 |
true,
|
505 |
true,
|
506 |
true,
|
507 |
-
false,
|
508 |
true,
|
509 |
true,
|
510 |
true,
|
511 |
true,
|
512 |
-
true
|
|
|
513 |
],
|
514 |
"model.decoder.layers.11.fc2.lora_E": [
|
515 |
true,
|
@@ -550,7 +550,7 @@
|
|
550 |
false,
|
551 |
false,
|
552 |
false,
|
553 |
-
|
554 |
true
|
555 |
],
|
556 |
"model.decoder.layers.11.self_attn.q_proj.lora_E": [
|
@@ -574,7 +574,7 @@
|
|
574 |
false,
|
575 |
false,
|
576 |
false,
|
577 |
-
|
578 |
false,
|
579 |
false,
|
580 |
false,
|
@@ -680,17 +680,17 @@
|
|
680 |
false
|
681 |
],
|
682 |
"model.decoder.layers.12.self_attn.out_proj.lora_E": [
|
683 |
-
true,
|
684 |
false,
|
685 |
false,
|
686 |
-
true,
|
687 |
false,
|
688 |
false,
|
689 |
false,
|
690 |
false,
|
691 |
-
true,
|
692 |
false,
|
693 |
false,
|
|
|
|
|
|
|
694 |
false
|
695 |
],
|
696 |
"model.decoder.layers.12.self_attn.q_proj.lora_E": [
|
@@ -737,17 +737,17 @@
|
|
737 |
],
|
738 |
"model.decoder.layers.13.encoder_attn.out_proj.lora_E": [
|
739 |
false,
|
740 |
-
|
741 |
false,
|
742 |
true,
|
743 |
false,
|
744 |
false,
|
745 |
false,
|
746 |
false,
|
747 |
-
true,
|
748 |
false,
|
749 |
-
|
750 |
-
|
|
|
751 |
],
|
752 |
"model.decoder.layers.13.encoder_attn.q_proj.lora_E": [
|
753 |
false,
|
@@ -765,14 +765,14 @@
|
|
765 |
],
|
766 |
"model.decoder.layers.13.encoder_attn.v_proj.lora_E": [
|
767 |
false,
|
768 |
-
|
769 |
false,
|
770 |
false,
|
771 |
true,
|
772 |
true,
|
773 |
true,
|
774 |
-
|
775 |
-
|
776 |
true,
|
777 |
true,
|
778 |
true
|
@@ -823,7 +823,7 @@
|
|
823 |
false,
|
824 |
false,
|
825 |
true,
|
826 |
-
|
827 |
true,
|
828 |
false,
|
829 |
true,
|
@@ -859,7 +859,7 @@
|
|
859 |
false,
|
860 |
false,
|
861 |
false,
|
862 |
-
|
863 |
],
|
864 |
"model.decoder.layers.14.encoder_attn.k_proj.lora_E": [
|
865 |
false,
|
@@ -927,7 +927,7 @@
|
|
927 |
true,
|
928 |
true,
|
929 |
true,
|
930 |
-
|
931 |
true,
|
932 |
true
|
933 |
],
|
@@ -962,7 +962,7 @@
|
|
962 |
"model.decoder.layers.14.self_attn.out_proj.lora_E": [
|
963 |
false,
|
964 |
true,
|
965 |
-
|
966 |
true,
|
967 |
false,
|
968 |
false,
|
@@ -1077,7 +1077,7 @@
|
|
1077 |
true,
|
1078 |
true,
|
1079 |
true,
|
1080 |
-
|
1081 |
true,
|
1082 |
true,
|
1083 |
true,
|
@@ -1108,7 +1108,7 @@
|
|
1108 |
true,
|
1109 |
true,
|
1110 |
false,
|
1111 |
-
|
1112 |
false,
|
1113 |
false,
|
1114 |
true
|
@@ -1128,17 +1128,17 @@
|
|
1128 |
false
|
1129 |
],
|
1130 |
"model.decoder.layers.15.self_attn.v_proj.lora_E": [
|
1131 |
-
true,
|
1132 |
-
true,
|
1133 |
-
true,
|
1134 |
-
true,
|
1135 |
true,
|
1136 |
false,
|
|
|
1137 |
false,
|
|
|
1138 |
false,
|
1139 |
false,
|
1140 |
true,
|
1141 |
true,
|
|
|
|
|
1142 |
false
|
1143 |
],
|
1144 |
"model.decoder.layers.16.encoder_attn.k_proj.lora_E": [
|
@@ -1181,7 +1181,7 @@
|
|
1181 |
false,
|
1182 |
false,
|
1183 |
false,
|
1184 |
-
|
1185 |
],
|
1186 |
"model.decoder.layers.16.encoder_attn.v_proj.lora_E": [
|
1187 |
true,
|
@@ -1221,7 +1221,7 @@
|
|
1221 |
true,
|
1222 |
true,
|
1223 |
true,
|
1224 |
-
|
1225 |
true,
|
1226 |
true
|
1227 |
],
|
@@ -1272,7 +1272,7 @@
|
|
1272 |
true,
|
1273 |
true,
|
1274 |
true,
|
1275 |
-
|
1276 |
true,
|
1277 |
true,
|
1278 |
true,
|
@@ -1296,9 +1296,8 @@
|
|
1296 |
false
|
1297 |
],
|
1298 |
"model.decoder.layers.17.encoder_attn.out_proj.lora_E": [
|
1299 |
-
true,
|
1300 |
false,
|
1301 |
-
|
1302 |
true,
|
1303 |
false,
|
1304 |
false,
|
@@ -1306,7 +1305,8 @@
|
|
1306 |
false,
|
1307 |
false,
|
1308 |
false,
|
1309 |
-
|
|
|
1310 |
false
|
1311 |
],
|
1312 |
"model.decoder.layers.17.encoder_attn.q_proj.lora_E": [
|
@@ -1329,7 +1329,7 @@
|
|
1329 |
true,
|
1330 |
true,
|
1331 |
true,
|
1332 |
-
|
1333 |
true,
|
1334 |
true,
|
1335 |
true,
|
@@ -1375,7 +1375,7 @@
|
|
1375 |
false,
|
1376 |
false,
|
1377 |
false,
|
1378 |
-
|
1379 |
false,
|
1380 |
false
|
1381 |
],
|
@@ -1447,7 +1447,7 @@
|
|
1447 |
false,
|
1448 |
false,
|
1449 |
false,
|
1450 |
-
|
1451 |
],
|
1452 |
"model.decoder.layers.18.encoder_attn.q_proj.lora_E": [
|
1453 |
false,
|
@@ -1461,7 +1461,7 @@
|
|
1461 |
false,
|
1462 |
false,
|
1463 |
false,
|
1464 |
-
|
1465 |
],
|
1466 |
"model.decoder.layers.18.encoder_attn.v_proj.lora_E": [
|
1467 |
true,
|
@@ -1503,7 +1503,7 @@
|
|
1503 |
true,
|
1504 |
true,
|
1505 |
false,
|
1506 |
-
|
1507 |
],
|
1508 |
"model.decoder.layers.18.self_attn.k_proj.lora_E": [
|
1509 |
false,
|
@@ -1517,11 +1517,11 @@
|
|
1517 |
false,
|
1518 |
false,
|
1519 |
false,
|
1520 |
-
|
1521 |
],
|
1522 |
"model.decoder.layers.18.self_attn.out_proj.lora_E": [
|
1523 |
true,
|
1524 |
-
|
1525 |
true,
|
1526 |
false,
|
1527 |
true,
|
@@ -1552,7 +1552,7 @@
|
|
1552 |
false,
|
1553 |
true,
|
1554 |
false,
|
1555 |
-
|
1556 |
true,
|
1557 |
false,
|
1558 |
false,
|
@@ -1578,12 +1578,12 @@
|
|
1578 |
"model.decoder.layers.19.encoder_attn.out_proj.lora_E": [
|
1579 |
false,
|
1580 |
false,
|
1581 |
-
true,
|
1582 |
false,
|
1583 |
false,
|
1584 |
false,
|
1585 |
false,
|
1586 |
-
|
|
|
1587 |
false,
|
1588 |
false,
|
1589 |
false,
|
@@ -1608,14 +1608,14 @@
|
|
1608 |
true,
|
1609 |
false,
|
1610 |
false,
|
1611 |
-
|
1612 |
false,
|
1613 |
true,
|
1614 |
false,
|
1615 |
false,
|
1616 |
false,
|
1617 |
false,
|
1618 |
-
|
1619 |
],
|
1620 |
"model.decoder.layers.19.fc1.lora_E": [
|
1621 |
true,
|
@@ -1636,7 +1636,7 @@
|
|
1636 |
false,
|
1637 |
true,
|
1638 |
false,
|
1639 |
-
|
1640 |
true,
|
1641 |
false,
|
1642 |
true,
|
@@ -1663,12 +1663,12 @@
|
|
1663 |
true,
|
1664 |
false,
|
1665 |
false,
|
1666 |
-
|
1667 |
false,
|
1668 |
true,
|
1669 |
false,
|
1670 |
false,
|
1671 |
-
|
1672 |
true,
|
1673 |
false,
|
1674 |
false
|
@@ -1699,7 +1699,7 @@
|
|
1699 |
false,
|
1700 |
true,
|
1701 |
false,
|
1702 |
-
|
1703 |
],
|
1704 |
"model.decoder.layers.2.encoder_attn.k_proj.lora_E": [
|
1705 |
false,
|
@@ -1726,7 +1726,7 @@
|
|
1726 |
false,
|
1727 |
false,
|
1728 |
false,
|
1729 |
-
|
1730 |
false
|
1731 |
],
|
1732 |
"model.decoder.layers.2.encoder_attn.q_proj.lora_E": [
|
@@ -1861,9 +1861,9 @@
|
|
1861 |
false,
|
1862 |
true,
|
1863 |
true,
|
|
|
1864 |
true,
|
1865 |
-
|
1866 |
-
true,
|
1867 |
true,
|
1868 |
true,
|
1869 |
true,
|
@@ -1922,11 +1922,11 @@
|
|
1922 |
true,
|
1923 |
true,
|
1924 |
true,
|
1925 |
-
|
1926 |
true
|
1927 |
],
|
1928 |
"model.decoder.layers.20.self_attn.k_proj.lora_E": [
|
1929 |
-
|
1930 |
true,
|
1931 |
true,
|
1932 |
true,
|
@@ -2005,8 +2005,8 @@
|
|
2005 |
false,
|
2006 |
true,
|
2007 |
true,
|
2008 |
-
|
2009 |
-
|
2010 |
true
|
2011 |
],
|
2012 |
"model.decoder.layers.21.encoder_attn.q_proj.lora_E": [
|
@@ -2056,7 +2056,7 @@
|
|
2056 |
false,
|
2057 |
true,
|
2058 |
true,
|
2059 |
-
|
2060 |
true,
|
2061 |
true,
|
2062 |
true,
|
@@ -2066,14 +2066,14 @@
|
|
2066 |
true
|
2067 |
],
|
2068 |
"model.decoder.layers.21.self_attn.k_proj.lora_E": [
|
2069 |
-
true,
|
2070 |
true,
|
2071 |
false,
|
2072 |
-
|
2073 |
true,
|
2074 |
true,
|
2075 |
false,
|
2076 |
false,
|
|
|
2077 |
true,
|
2078 |
true,
|
2079 |
true,
|
@@ -2100,7 +2100,7 @@
|
|
2100 |
false,
|
2101 |
false,
|
2102 |
true,
|
2103 |
-
|
2104 |
false,
|
2105 |
true,
|
2106 |
false,
|
@@ -2136,7 +2136,7 @@
|
|
2136 |
false
|
2137 |
],
|
2138 |
"model.decoder.layers.22.encoder_attn.out_proj.lora_E": [
|
2139 |
-
|
2140 |
true,
|
2141 |
false,
|
2142 |
true,
|
@@ -2220,13 +2220,13 @@
|
|
2220 |
false
|
2221 |
],
|
2222 |
"model.decoder.layers.22.self_attn.out_proj.lora_E": [
|
2223 |
-
false,
|
2224 |
-
false,
|
2225 |
false,
|
2226 |
true,
|
2227 |
false,
|
|
|
2228 |
false,
|
2229 |
true,
|
|
|
2230 |
true,
|
2231 |
true,
|
2232 |
false,
|
@@ -2243,7 +2243,7 @@
|
|
2243 |
false,
|
2244 |
false,
|
2245 |
false,
|
2246 |
-
|
2247 |
false,
|
2248 |
false
|
2249 |
],
|
@@ -2257,9 +2257,9 @@
|
|
2257 |
true,
|
2258 |
true,
|
2259 |
true,
|
2260 |
-
|
2261 |
-
|
2262 |
-
|
2263 |
],
|
2264 |
"model.decoder.layers.23.encoder_attn.k_proj.lora_E": [
|
2265 |
false,
|
@@ -2347,15 +2347,15 @@
|
|
2347 |
],
|
2348 |
"model.decoder.layers.23.self_attn.k_proj.lora_E": [
|
2349 |
false,
|
2350 |
-
true,
|
2351 |
false,
|
2352 |
false,
|
2353 |
true,
|
|
|
2354 |
false,
|
2355 |
false,
|
2356 |
true,
|
2357 |
false,
|
2358 |
-
|
2359 |
false,
|
2360 |
true
|
2361 |
],
|
@@ -2364,7 +2364,7 @@
|
|
2364 |
true,
|
2365 |
true,
|
2366 |
true,
|
2367 |
-
|
2368 |
false,
|
2369 |
false,
|
2370 |
true,
|
@@ -2395,9 +2395,9 @@
|
|
2395 |
true,
|
2396 |
false,
|
2397 |
true,
|
2398 |
-
false,
|
2399 |
true,
|
2400 |
-
|
|
|
2401 |
false,
|
2402 |
true
|
2403 |
],
|
@@ -2481,9 +2481,9 @@
|
|
2481 |
true,
|
2482 |
true,
|
2483 |
true,
|
2484 |
-
true,
|
2485 |
false,
|
2486 |
-
false
|
|
|
2487 |
],
|
2488 |
"model.decoder.layers.24.self_attn.k_proj.lora_E": [
|
2489 |
true,
|
@@ -2492,12 +2492,12 @@
|
|
2492 |
true,
|
2493 |
true,
|
2494 |
true,
|
|
|
2495 |
true,
|
2496 |
true,
|
2497 |
true,
|
2498 |
true,
|
2499 |
-
|
2500 |
-
true
|
2501 |
],
|
2502 |
"model.decoder.layers.24.self_attn.out_proj.lora_E": [
|
2503 |
true,
|
@@ -2558,10 +2558,10 @@
|
|
2558 |
"model.decoder.layers.25.encoder_attn.out_proj.lora_E": [
|
2559 |
true,
|
2560 |
true,
|
2561 |
-
|
2562 |
-
true,
|
2563 |
true,
|
2564 |
false,
|
|
|
2565 |
true,
|
2566 |
true,
|
2567 |
true,
|
@@ -2626,15 +2626,15 @@
|
|
2626 |
true
|
2627 |
],
|
2628 |
"model.decoder.layers.25.self_attn.k_proj.lora_E": [
|
2629 |
-
true,
|
2630 |
-
true,
|
2631 |
-
true,
|
2632 |
true,
|
2633 |
false,
|
2634 |
true,
|
2635 |
true,
|
|
|
2636 |
true,
|
2637 |
true,
|
|
|
|
|
2638 |
true,
|
2639 |
true,
|
2640 |
false
|
@@ -2649,7 +2649,7 @@
|
|
2649 |
false,
|
2650 |
false,
|
2651 |
true,
|
2652 |
-
|
2653 |
false,
|
2654 |
false
|
2655 |
],
|
@@ -2698,7 +2698,7 @@
|
|
2698 |
"model.decoder.layers.26.encoder_attn.out_proj.lora_E": [
|
2699 |
true,
|
2700 |
true,
|
2701 |
-
|
2702 |
true,
|
2703 |
true,
|
2704 |
true,
|
@@ -2706,7 +2706,7 @@
|
|
2706 |
true,
|
2707 |
true,
|
2708 |
true,
|
2709 |
-
|
2710 |
true
|
2711 |
],
|
2712 |
"model.decoder.layers.26.encoder_attn.q_proj.lora_E": [
|
@@ -2766,7 +2766,7 @@
|
|
2766 |
false
|
2767 |
],
|
2768 |
"model.decoder.layers.26.self_attn.k_proj.lora_E": [
|
2769 |
-
|
2770 |
true,
|
2771 |
false,
|
2772 |
true,
|
@@ -2774,18 +2774,18 @@
|
|
2774 |
false,
|
2775 |
false,
|
2776 |
false,
|
2777 |
-
|
2778 |
true,
|
2779 |
false,
|
2780 |
false
|
2781 |
],
|
2782 |
"model.decoder.layers.26.self_attn.out_proj.lora_E": [
|
2783 |
-
false,
|
2784 |
false,
|
2785 |
false,
|
2786 |
true,
|
2787 |
-
false,
|
2788 |
true,
|
|
|
|
|
2789 |
true,
|
2790 |
false,
|
2791 |
false,
|
@@ -2794,9 +2794,7 @@
|
|
2794 |
true
|
2795 |
],
|
2796 |
"model.decoder.layers.26.self_attn.q_proj.lora_E": [
|
2797 |
-
true,
|
2798 |
false,
|
2799 |
-
true,
|
2800 |
false,
|
2801 |
false,
|
2802 |
false,
|
@@ -2805,19 +2803,21 @@
|
|
2805 |
false,
|
2806 |
false,
|
2807 |
false,
|
2808 |
-
|
|
|
|
|
2809 |
],
|
2810 |
"model.decoder.layers.26.self_attn.v_proj.lora_E": [
|
2811 |
false,
|
2812 |
true,
|
2813 |
false,
|
2814 |
-
true,
|
2815 |
-
true,
|
2816 |
false,
|
|
|
2817 |
false,
|
2818 |
false,
|
2819 |
true,
|
2820 |
true,
|
|
|
2821 |
false,
|
2822 |
true
|
2823 |
],
|
@@ -2845,7 +2845,7 @@
|
|
2845 |
true,
|
2846 |
true,
|
2847 |
true,
|
2848 |
-
|
2849 |
false,
|
2850 |
true
|
2851 |
],
|
@@ -2907,7 +2907,7 @@
|
|
2907 |
],
|
2908 |
"model.decoder.layers.27.self_attn.k_proj.lora_E": [
|
2909 |
true,
|
2910 |
-
|
2911 |
true,
|
2912 |
false,
|
2913 |
false,
|
@@ -2948,7 +2948,7 @@
|
|
2948 |
false
|
2949 |
],
|
2950 |
"model.decoder.layers.27.self_attn.v_proj.lora_E": [
|
2951 |
-
|
2952 |
true,
|
2953 |
false,
|
2954 |
false,
|
@@ -2977,13 +2977,13 @@
|
|
2977 |
],
|
2978 |
"model.decoder.layers.28.encoder_attn.out_proj.lora_E": [
|
2979 |
true,
|
|
|
2980 |
true,
|
2981 |
true,
|
2982 |
true,
|
2983 |
true,
|
2984 |
-
false,
|
2985 |
-
true,
|
2986 |
true,
|
|
|
2987 |
true,
|
2988 |
true,
|
2989 |
true,
|
@@ -3041,12 +3041,12 @@
|
|
3041 |
true,
|
3042 |
true,
|
3043 |
true,
|
3044 |
-
|
3045 |
true,
|
3046 |
true
|
3047 |
],
|
3048 |
"model.decoder.layers.28.self_attn.k_proj.lora_E": [
|
3049 |
-
|
3050 |
true,
|
3051 |
true,
|
3052 |
false,
|
@@ -3068,7 +3068,7 @@
|
|
3068 |
true,
|
3069 |
true,
|
3070 |
false,
|
3071 |
-
|
3072 |
false,
|
3073 |
false,
|
3074 |
false
|
@@ -3097,7 +3097,7 @@
|
|
3097 |
false,
|
3098 |
false,
|
3099 |
false,
|
3100 |
-
|
3101 |
false,
|
3102 |
true
|
3103 |
],
|
@@ -3116,18 +3116,18 @@
|
|
3116 |
false
|
3117 |
],
|
3118 |
"model.decoder.layers.29.encoder_attn.out_proj.lora_E": [
|
3119 |
-
true,
|
3120 |
-
true,
|
3121 |
true,
|
3122 |
false,
|
3123 |
false,
|
3124 |
false,
|
3125 |
-
true,
|
3126 |
false,
|
3127 |
false,
|
3128 |
false,
|
3129 |
false,
|
3130 |
-
|
|
|
|
|
|
|
3131 |
],
|
3132 |
"model.decoder.layers.29.encoder_attn.q_proj.lora_E": [
|
3133 |
false,
|
@@ -3191,7 +3191,7 @@
|
|
3191 |
false,
|
3192 |
false,
|
3193 |
false,
|
3194 |
-
|
3195 |
true,
|
3196 |
false,
|
3197 |
false,
|
@@ -3210,7 +3210,7 @@
|
|
3210 |
false,
|
3211 |
false,
|
3212 |
false,
|
3213 |
-
|
3214 |
false
|
3215 |
],
|
3216 |
"model.decoder.layers.29.self_attn.q_proj.lora_E": [
|
@@ -3259,13 +3259,13 @@
|
|
3259 |
true,
|
3260 |
true,
|
3261 |
false,
|
3262 |
-
true,
|
3263 |
-
false,
|
3264 |
false,
|
3265 |
true,
|
|
|
3266 |
true,
|
3267 |
true,
|
3268 |
false,
|
|
|
3269 |
true,
|
3270 |
false
|
3271 |
],
|
@@ -3368,7 +3368,7 @@
|
|
3368 |
false
|
3369 |
],
|
3370 |
"model.decoder.layers.3.self_attn.v_proj.lora_E": [
|
3371 |
-
|
3372 |
false,
|
3373 |
false,
|
3374 |
false,
|
@@ -3378,7 +3378,7 @@
|
|
3378 |
false,
|
3379 |
false,
|
3380 |
true,
|
3381 |
-
|
3382 |
false
|
3383 |
],
|
3384 |
"model.decoder.layers.30.encoder_attn.k_proj.lora_E": [
|
@@ -3396,17 +3396,17 @@
|
|
3396 |
false
|
3397 |
],
|
3398 |
"model.decoder.layers.30.encoder_attn.out_proj.lora_E": [
|
3399 |
-
true,
|
3400 |
true,
|
3401 |
false,
|
3402 |
false,
|
3403 |
false,
|
3404 |
false,
|
|
|
|
|
|
|
|
|
3405 |
true,
|
3406 |
-
|
3407 |
-
true,
|
3408 |
-
true,
|
3409 |
-
true,
|
3410 |
false
|
3411 |
],
|
3412 |
"model.decoder.layers.30.encoder_attn.q_proj.lora_E": [
|
@@ -3475,7 +3475,7 @@
|
|
3475 |
false,
|
3476 |
true,
|
3477 |
false,
|
3478 |
-
|
3479 |
true,
|
3480 |
true
|
3481 |
],
|
@@ -3517,7 +3517,7 @@
|
|
3517 |
false,
|
3518 |
false,
|
3519 |
false,
|
3520 |
-
|
3521 |
false,
|
3522 |
true
|
3523 |
],
|
@@ -3584,7 +3584,7 @@
|
|
3584 |
false,
|
3585 |
false,
|
3586 |
false,
|
3587 |
-
|
3588 |
false,
|
3589 |
true,
|
3590 |
true,
|
@@ -3686,7 +3686,7 @@
|
|
3686 |
false,
|
3687 |
false,
|
3688 |
false,
|
3689 |
-
|
3690 |
false
|
3691 |
],
|
3692 |
"model.decoder.layers.4.encoder_attn.q_proj.lora_E": [
|
@@ -3705,7 +3705,7 @@
|
|
3705 |
],
|
3706 |
"model.decoder.layers.4.encoder_attn.v_proj.lora_E": [
|
3707 |
true,
|
3708 |
-
|
3709 |
false,
|
3710 |
false,
|
3711 |
false,
|
@@ -3714,8 +3714,8 @@
|
|
3714 |
true,
|
3715 |
false,
|
3716 |
true,
|
3717 |
-
|
3718 |
-
|
3719 |
],
|
3720 |
"model.decoder.layers.4.fc1.lora_E": [
|
3721 |
true,
|
@@ -3853,7 +3853,7 @@
|
|
3853 |
false,
|
3854 |
true,
|
3855 |
true,
|
3856 |
-
|
3857 |
false,
|
3858 |
true
|
3859 |
],
|
@@ -4048,7 +4048,7 @@
|
|
4048 |
false,
|
4049 |
false,
|
4050 |
false,
|
4051 |
-
|
4052 |
false,
|
4053 |
false,
|
4054 |
false
|
@@ -4069,7 +4069,7 @@
|
|
4069 |
],
|
4070 |
"model.decoder.layers.6.self_attn.v_proj.lora_E": [
|
4071 |
false,
|
4072 |
-
|
4073 |
false,
|
4074 |
false,
|
4075 |
false,
|
@@ -4079,7 +4079,7 @@
|
|
4079 |
false,
|
4080 |
true,
|
4081 |
false,
|
4082 |
-
|
4083 |
],
|
4084 |
"model.decoder.layers.7.encoder_attn.k_proj.lora_E": [
|
4085 |
false,
|
@@ -4149,7 +4149,7 @@
|
|
4149 |
true,
|
4150 |
true,
|
4151 |
true,
|
4152 |
-
|
4153 |
],
|
4154 |
"model.decoder.layers.7.fc2.lora_E": [
|
4155 |
true,
|
@@ -4428,7 +4428,7 @@
|
|
4428 |
true,
|
4429 |
true,
|
4430 |
true,
|
4431 |
-
|
4432 |
true
|
4433 |
],
|
4434 |
"model.decoder.layers.9.fc2.lora_E": [
|
@@ -4443,7 +4443,7 @@
|
|
4443 |
true,
|
4444 |
true,
|
4445 |
true,
|
4446 |
-
|
4447 |
],
|
4448 |
"model.decoder.layers.9.self_attn.k_proj.lora_E": [
|
4449 |
false,
|
@@ -4496,9 +4496,9 @@
|
|
4496 |
true,
|
4497 |
false,
|
4498 |
false,
|
4499 |
-
true,
|
4500 |
false,
|
4501 |
false,
|
|
|
4502 |
false
|
4503 |
],
|
4504 |
"model.encoder.layers.0.fc1.lora_E": [
|
@@ -4550,8 +4550,8 @@
|
|
4550 |
true,
|
4551 |
false,
|
4552 |
false,
|
4553 |
-
|
4554 |
-
|
4555 |
false,
|
4556 |
false,
|
4557 |
false,
|
@@ -4778,7 +4778,7 @@
|
|
4778 |
false,
|
4779 |
true,
|
4780 |
false,
|
4781 |
-
|
4782 |
false
|
4783 |
],
|
4784 |
"model.encoder.layers.11.self_attn.k_proj.lora_E": [
|
@@ -4832,7 +4832,7 @@
|
|
4832 |
false,
|
4833 |
false,
|
4834 |
false,
|
4835 |
-
|
4836 |
false,
|
4837 |
false,
|
4838 |
false
|
@@ -4885,7 +4885,7 @@
|
|
4885 |
false,
|
4886 |
false,
|
4887 |
false,
|
4888 |
-
|
4889 |
false,
|
4890 |
false,
|
4891 |
false,
|
@@ -4926,7 +4926,7 @@
|
|
4926 |
false,
|
4927 |
false,
|
4928 |
false,
|
4929 |
-
|
4930 |
true,
|
4931 |
false,
|
4932 |
false,
|
@@ -4943,7 +4943,7 @@
|
|
4943 |
true,
|
4944 |
false,
|
4945 |
false,
|
4946 |
-
|
4947 |
false,
|
4948 |
false,
|
4949 |
true,
|
@@ -4967,7 +4967,7 @@
|
|
4967 |
false,
|
4968 |
false,
|
4969 |
false,
|
4970 |
-
|
4971 |
false,
|
4972 |
false,
|
4973 |
false,
|
@@ -5003,7 +5003,7 @@
|
|
5003 |
false,
|
5004 |
false,
|
5005 |
false,
|
5006 |
-
|
5007 |
],
|
5008 |
"model.encoder.layers.14.fc1.lora_E": [
|
5009 |
true,
|
@@ -5016,7 +5016,7 @@
|
|
5016 |
true,
|
5017 |
false,
|
5018 |
false,
|
5019 |
-
|
5020 |
true
|
5021 |
],
|
5022 |
"model.encoder.layers.14.fc2.lora_E": [
|
@@ -5056,7 +5056,7 @@
|
|
5056 |
false,
|
5057 |
false,
|
5058 |
true,
|
5059 |
-
|
5060 |
false,
|
5061 |
true,
|
5062 |
false
|
@@ -5136,11 +5136,11 @@
|
|
5136 |
true,
|
5137 |
false,
|
5138 |
false,
|
5139 |
-
|
5140 |
true,
|
5141 |
false,
|
5142 |
false,
|
5143 |
-
|
5144 |
false,
|
5145 |
false,
|
5146 |
false
|
@@ -5168,7 +5168,7 @@
|
|
5168 |
false,
|
5169 |
false,
|
5170 |
true,
|
5171 |
-
|
5172 |
false,
|
5173 |
false,
|
5174 |
true
|
@@ -5223,10 +5223,10 @@
|
|
5223 |
true,
|
5224 |
false,
|
5225 |
true,
|
5226 |
-
true,
|
5227 |
false,
|
5228 |
false,
|
5229 |
false,
|
|
|
5230 |
true
|
5231 |
],
|
5232 |
"model.encoder.layers.16.self_attn.q_proj.lora_E": [
|
@@ -5307,9 +5307,9 @@
|
|
5307 |
true,
|
5308 |
true,
|
5309 |
false,
|
|
|
5310 |
true,
|
5311 |
-
|
5312 |
-
true,
|
5313 |
true,
|
5314 |
true
|
5315 |
],
|
@@ -5386,12 +5386,12 @@
|
|
5386 |
"model.encoder.layers.18.self_attn.out_proj.lora_E": [
|
5387 |
false,
|
5388 |
false,
|
5389 |
-
true,
|
5390 |
false,
|
5391 |
false,
|
5392 |
false,
|
5393 |
-
|
5394 |
-
|
|
|
5395 |
true,
|
5396 |
false,
|
5397 |
false,
|
@@ -5474,7 +5474,7 @@
|
|
5474 |
false,
|
5475 |
false,
|
5476 |
false,
|
5477 |
-
|
5478 |
false,
|
5479 |
false,
|
5480 |
false,
|
@@ -5806,12 +5806,12 @@
|
|
5806 |
"model.encoder.layers.22.self_attn.out_proj.lora_E": [
|
5807 |
false,
|
5808 |
false,
|
5809 |
-
|
5810 |
false,
|
5811 |
false,
|
5812 |
true,
|
5813 |
-
|
5814 |
-
|
5815 |
true,
|
5816 |
false,
|
5817 |
false,
|
@@ -5892,13 +5892,13 @@
|
|
5892 |
true,
|
5893 |
true,
|
5894 |
true,
|
5895 |
-
|
5896 |
-
|
5897 |
true,
|
5898 |
true,
|
5899 |
false,
|
5900 |
false,
|
5901 |
-
|
5902 |
true
|
5903 |
],
|
5904 |
"model.encoder.layers.23.self_attn.q_proj.lora_E": [
|
@@ -5916,10 +5916,10 @@
|
|
5916 |
false
|
5917 |
],
|
5918 |
"model.encoder.layers.23.self_attn.v_proj.lora_E": [
|
5919 |
-
true,
|
5920 |
true,
|
5921 |
false,
|
5922 |
false,
|
|
|
5923 |
true,
|
5924 |
true,
|
5925 |
true,
|
@@ -6011,7 +6011,7 @@
|
|
6011 |
true,
|
6012 |
true,
|
6013 |
true,
|
6014 |
-
|
6015 |
],
|
6016 |
"model.encoder.layers.25.fc1.lora_E": [
|
6017 |
true,
|
@@ -6092,7 +6092,7 @@
|
|
6092 |
true,
|
6093 |
true,
|
6094 |
true,
|
6095 |
-
|
6096 |
true,
|
6097 |
true,
|
6098 |
true
|
@@ -6227,11 +6227,11 @@
|
|
6227 |
true,
|
6228 |
true,
|
6229 |
false,
|
6230 |
-
false,
|
6231 |
-
false,
|
6232 |
true,
|
|
|
6233 |
true,
|
6234 |
true,
|
|
|
6235 |
true,
|
6236 |
false,
|
6237 |
false,
|
@@ -6309,7 +6309,7 @@
|
|
6309 |
],
|
6310 |
"model.encoder.layers.28.self_attn.out_proj.lora_E": [
|
6311 |
false,
|
6312 |
-
|
6313 |
false,
|
6314 |
true,
|
6315 |
false,
|
@@ -6319,7 +6319,7 @@
|
|
6319 |
true,
|
6320 |
true,
|
6321 |
true,
|
6322 |
-
|
6323 |
],
|
6324 |
"model.encoder.layers.28.self_attn.q_proj.lora_E": [
|
6325 |
false,
|
@@ -6400,7 +6400,7 @@
|
|
6400 |
true,
|
6401 |
true,
|
6402 |
true,
|
6403 |
-
|
6404 |
true,
|
6405 |
true,
|
6406 |
true
|
@@ -6425,7 +6425,7 @@
|
|
6425 |
true,
|
6426 |
true,
|
6427 |
true,
|
6428 |
-
|
6429 |
true,
|
6430 |
true,
|
6431 |
true,
|
@@ -6564,12 +6564,12 @@
|
|
6564 |
true,
|
6565 |
true,
|
6566 |
true,
|
6567 |
-
false,
|
6568 |
true,
|
6569 |
true,
|
6570 |
true,
|
6571 |
true,
|
6572 |
true,
|
|
|
6573 |
true,
|
6574 |
false
|
6575 |
],
|
@@ -6649,7 +6649,7 @@
|
|
6649 |
false,
|
6650 |
false,
|
6651 |
true,
|
6652 |
-
|
6653 |
true,
|
6654 |
true,
|
6655 |
true,
|
@@ -6677,7 +6677,7 @@
|
|
6677 |
true,
|
6678 |
true,
|
6679 |
false,
|
6680 |
-
|
6681 |
false,
|
6682 |
true,
|
6683 |
true,
|
@@ -6794,7 +6794,7 @@
|
|
6794 |
false,
|
6795 |
false,
|
6796 |
false,
|
6797 |
-
|
6798 |
false
|
6799 |
],
|
6800 |
"model.encoder.layers.5.self_attn.k_proj.lora_E": [
|
@@ -6871,7 +6871,7 @@
|
|
6871 |
false,
|
6872 |
false,
|
6873 |
false,
|
6874 |
-
|
6875 |
false,
|
6876 |
false,
|
6877 |
false,
|
@@ -7040,7 +7040,7 @@
|
|
7040 |
false,
|
7041 |
false,
|
7042 |
false,
|
7043 |
-
|
7044 |
false,
|
7045 |
false,
|
7046 |
false,
|
@@ -7129,8 +7129,8 @@
|
|
7129 |
false,
|
7130 |
false,
|
7131 |
false,
|
7132 |
-
|
7133 |
-
|
7134 |
false
|
7135 |
],
|
7136 |
"model.encoder.layers.9.self_attn.k_proj.lora_E": [
|
|
|
38 |
"model.decoder.layers.0.encoder_attn.out_proj.lora_E": [
|
39 |
true,
|
40 |
false,
|
41 |
+
false,
|
42 |
false,
|
43 |
false,
|
44 |
true,
|
|
|
73 |
false,
|
74 |
false,
|
75 |
false,
|
76 |
+
true,
|
77 |
true,
|
78 |
true
|
79 |
],
|
|
|
102 |
true,
|
103 |
true,
|
104 |
true,
|
105 |
+
true,
|
106 |
true
|
107 |
],
|
108 |
"model.decoder.layers.0.self_attn.k_proj.lora_E": [
|
|
|
215 |
false,
|
216 |
false,
|
217 |
false,
|
218 |
+
false
|
219 |
],
|
220 |
"model.decoder.layers.1.fc1.lora_E": [
|
221 |
true,
|
|
|
365 |
true,
|
366 |
true,
|
367 |
true,
|
368 |
+
false,
|
369 |
true,
|
370 |
true,
|
371 |
true,
|
|
|
504 |
true,
|
505 |
true,
|
506 |
true,
|
|
|
507 |
true,
|
508 |
true,
|
509 |
true,
|
510 |
true,
|
511 |
+
true,
|
512 |
+
false
|
513 |
],
|
514 |
"model.decoder.layers.11.fc2.lora_E": [
|
515 |
true,
|
|
|
550 |
false,
|
551 |
false,
|
552 |
false,
|
553 |
+
true,
|
554 |
true
|
555 |
],
|
556 |
"model.decoder.layers.11.self_attn.q_proj.lora_E": [
|
|
|
574 |
false,
|
575 |
false,
|
576 |
false,
|
577 |
+
false,
|
578 |
false,
|
579 |
false,
|
580 |
false,
|
|
|
680 |
false
|
681 |
],
|
682 |
"model.decoder.layers.12.self_attn.out_proj.lora_E": [
|
|
|
683 |
false,
|
684 |
false,
|
|
|
685 |
false,
|
686 |
false,
|
687 |
false,
|
688 |
false,
|
|
|
689 |
false,
|
690 |
false,
|
691 |
+
true,
|
692 |
+
false,
|
693 |
+
true,
|
694 |
false
|
695 |
],
|
696 |
"model.decoder.layers.12.self_attn.q_proj.lora_E": [
|
|
|
737 |
],
|
738 |
"model.decoder.layers.13.encoder_attn.out_proj.lora_E": [
|
739 |
false,
|
740 |
+
false,
|
741 |
false,
|
742 |
true,
|
743 |
false,
|
744 |
false,
|
745 |
false,
|
746 |
false,
|
|
|
747 |
false,
|
748 |
+
false,
|
749 |
+
false,
|
750 |
+
false
|
751 |
],
|
752 |
"model.decoder.layers.13.encoder_attn.q_proj.lora_E": [
|
753 |
false,
|
|
|
765 |
],
|
766 |
"model.decoder.layers.13.encoder_attn.v_proj.lora_E": [
|
767 |
false,
|
768 |
+
false,
|
769 |
false,
|
770 |
false,
|
771 |
true,
|
772 |
true,
|
773 |
true,
|
774 |
+
false,
|
775 |
+
false,
|
776 |
true,
|
777 |
true,
|
778 |
true
|
|
|
823 |
false,
|
824 |
false,
|
825 |
true,
|
826 |
+
false,
|
827 |
true,
|
828 |
false,
|
829 |
true,
|
|
|
859 |
false,
|
860 |
false,
|
861 |
false,
|
862 |
+
false
|
863 |
],
|
864 |
"model.decoder.layers.14.encoder_attn.k_proj.lora_E": [
|
865 |
false,
|
|
|
927 |
true,
|
928 |
true,
|
929 |
true,
|
930 |
+
false,
|
931 |
true,
|
932 |
true
|
933 |
],
|
|
|
962 |
"model.decoder.layers.14.self_attn.out_proj.lora_E": [
|
963 |
false,
|
964 |
true,
|
965 |
+
true,
|
966 |
true,
|
967 |
false,
|
968 |
false,
|
|
|
1077 |
true,
|
1078 |
true,
|
1079 |
true,
|
1080 |
+
false,
|
1081 |
true,
|
1082 |
true,
|
1083 |
true,
|
|
|
1108 |
true,
|
1109 |
true,
|
1110 |
false,
|
1111 |
+
true,
|
1112 |
false,
|
1113 |
false,
|
1114 |
true
|
|
|
1128 |
false
|
1129 |
],
|
1130 |
"model.decoder.layers.15.self_attn.v_proj.lora_E": [
|
|
|
|
|
|
|
|
|
1131 |
true,
|
1132 |
false,
|
1133 |
+
true,
|
1134 |
false,
|
1135 |
+
true,
|
1136 |
false,
|
1137 |
false,
|
1138 |
true,
|
1139 |
true,
|
1140 |
+
true,
|
1141 |
+
true,
|
1142 |
false
|
1143 |
],
|
1144 |
"model.decoder.layers.16.encoder_attn.k_proj.lora_E": [
|
|
|
1181 |
false,
|
1182 |
false,
|
1183 |
false,
|
1184 |
+
false
|
1185 |
],
|
1186 |
"model.decoder.layers.16.encoder_attn.v_proj.lora_E": [
|
1187 |
true,
|
|
|
1221 |
true,
|
1222 |
true,
|
1223 |
true,
|
1224 |
+
true,
|
1225 |
true,
|
1226 |
true
|
1227 |
],
|
|
|
1272 |
true,
|
1273 |
true,
|
1274 |
true,
|
1275 |
+
true,
|
1276 |
true,
|
1277 |
true,
|
1278 |
true,
|
|
|
1296 |
false
|
1297 |
],
|
1298 |
"model.decoder.layers.17.encoder_attn.out_proj.lora_E": [
|
|
|
1299 |
false,
|
1300 |
+
false,
|
1301 |
true,
|
1302 |
false,
|
1303 |
false,
|
|
|
1305 |
false,
|
1306 |
false,
|
1307 |
false,
|
1308 |
+
false,
|
1309 |
+
false,
|
1310 |
false
|
1311 |
],
|
1312 |
"model.decoder.layers.17.encoder_attn.q_proj.lora_E": [
|
|
|
1329 |
true,
|
1330 |
true,
|
1331 |
true,
|
1332 |
+
true,
|
1333 |
true,
|
1334 |
true,
|
1335 |
true,
|
|
|
1375 |
false,
|
1376 |
false,
|
1377 |
false,
|
1378 |
+
false,
|
1379 |
false,
|
1380 |
false
|
1381 |
],
|
|
|
1447 |
false,
|
1448 |
false,
|
1449 |
false,
|
1450 |
+
false
|
1451 |
],
|
1452 |
"model.decoder.layers.18.encoder_attn.q_proj.lora_E": [
|
1453 |
false,
|
|
|
1461 |
false,
|
1462 |
false,
|
1463 |
false,
|
1464 |
+
false
|
1465 |
],
|
1466 |
"model.decoder.layers.18.encoder_attn.v_proj.lora_E": [
|
1467 |
true,
|
|
|
1503 |
true,
|
1504 |
true,
|
1505 |
false,
|
1506 |
+
true
|
1507 |
],
|
1508 |
"model.decoder.layers.18.self_attn.k_proj.lora_E": [
|
1509 |
false,
|
|
|
1517 |
false,
|
1518 |
false,
|
1519 |
false,
|
1520 |
+
false
|
1521 |
],
|
1522 |
"model.decoder.layers.18.self_attn.out_proj.lora_E": [
|
1523 |
true,
|
1524 |
+
false,
|
1525 |
true,
|
1526 |
false,
|
1527 |
true,
|
|
|
1552 |
false,
|
1553 |
true,
|
1554 |
false,
|
1555 |
+
true,
|
1556 |
true,
|
1557 |
false,
|
1558 |
false,
|
|
|
1578 |
"model.decoder.layers.19.encoder_attn.out_proj.lora_E": [
|
1579 |
false,
|
1580 |
false,
|
|
|
1581 |
false,
|
1582 |
false,
|
1583 |
false,
|
1584 |
false,
|
1585 |
+
false,
|
1586 |
+
false,
|
1587 |
false,
|
1588 |
false,
|
1589 |
false,
|
|
|
1608 |
true,
|
1609 |
false,
|
1610 |
false,
|
1611 |
+
false,
|
1612 |
false,
|
1613 |
true,
|
1614 |
false,
|
1615 |
false,
|
1616 |
false,
|
1617 |
false,
|
1618 |
+
false
|
1619 |
],
|
1620 |
"model.decoder.layers.19.fc1.lora_E": [
|
1621 |
true,
|
|
|
1636 |
false,
|
1637 |
true,
|
1638 |
false,
|
1639 |
+
false,
|
1640 |
true,
|
1641 |
false,
|
1642 |
true,
|
|
|
1663 |
true,
|
1664 |
false,
|
1665 |
false,
|
1666 |
+
true,
|
1667 |
false,
|
1668 |
true,
|
1669 |
false,
|
1670 |
false,
|
1671 |
+
false,
|
1672 |
true,
|
1673 |
false,
|
1674 |
false
|
|
|
1699 |
false,
|
1700 |
true,
|
1701 |
false,
|
1702 |
+
false
|
1703 |
],
|
1704 |
"model.decoder.layers.2.encoder_attn.k_proj.lora_E": [
|
1705 |
false,
|
|
|
1726 |
false,
|
1727 |
false,
|
1728 |
false,
|
1729 |
+
false,
|
1730 |
false
|
1731 |
],
|
1732 |
"model.decoder.layers.2.encoder_attn.q_proj.lora_E": [
|
|
|
1861 |
false,
|
1862 |
true,
|
1863 |
true,
|
1864 |
+
false,
|
1865 |
true,
|
1866 |
+
false,
|
|
|
1867 |
true,
|
1868 |
true,
|
1869 |
true,
|
|
|
1922 |
true,
|
1923 |
true,
|
1924 |
true,
|
1925 |
+
false,
|
1926 |
true
|
1927 |
],
|
1928 |
"model.decoder.layers.20.self_attn.k_proj.lora_E": [
|
1929 |
+
true,
|
1930 |
true,
|
1931 |
true,
|
1932 |
true,
|
|
|
2005 |
false,
|
2006 |
true,
|
2007 |
true,
|
2008 |
+
false,
|
2009 |
+
false,
|
2010 |
true
|
2011 |
],
|
2012 |
"model.decoder.layers.21.encoder_attn.q_proj.lora_E": [
|
|
|
2056 |
false,
|
2057 |
true,
|
2058 |
true,
|
2059 |
+
false,
|
2060 |
true,
|
2061 |
true,
|
2062 |
true,
|
|
|
2066 |
true
|
2067 |
],
|
2068 |
"model.decoder.layers.21.self_attn.k_proj.lora_E": [
|
|
|
2069 |
true,
|
2070 |
false,
|
2071 |
+
false,
|
2072 |
true,
|
2073 |
true,
|
2074 |
false,
|
2075 |
false,
|
2076 |
+
false,
|
2077 |
true,
|
2078 |
true,
|
2079 |
true,
|
|
|
2100 |
false,
|
2101 |
false,
|
2102 |
true,
|
2103 |
+
false,
|
2104 |
false,
|
2105 |
true,
|
2106 |
false,
|
|
|
2136 |
false
|
2137 |
],
|
2138 |
"model.decoder.layers.22.encoder_attn.out_proj.lora_E": [
|
2139 |
+
false,
|
2140 |
true,
|
2141 |
false,
|
2142 |
true,
|
|
|
2220 |
false
|
2221 |
],
|
2222 |
"model.decoder.layers.22.self_attn.out_proj.lora_E": [
|
|
|
|
|
2223 |
false,
|
2224 |
true,
|
2225 |
false,
|
2226 |
+
true,
|
2227 |
false,
|
2228 |
true,
|
2229 |
+
false,
|
2230 |
true,
|
2231 |
true,
|
2232 |
false,
|
|
|
2243 |
false,
|
2244 |
false,
|
2245 |
false,
|
2246 |
+
false,
|
2247 |
false,
|
2248 |
false
|
2249 |
],
|
|
|
2257 |
true,
|
2258 |
true,
|
2259 |
true,
|
2260 |
+
false,
|
2261 |
+
false,
|
2262 |
+
true
|
2263 |
],
|
2264 |
"model.decoder.layers.23.encoder_attn.k_proj.lora_E": [
|
2265 |
false,
|
|
|
2347 |
],
|
2348 |
"model.decoder.layers.23.self_attn.k_proj.lora_E": [
|
2349 |
false,
|
|
|
2350 |
false,
|
2351 |
false,
|
2352 |
true,
|
2353 |
+
true,
|
2354 |
false,
|
2355 |
false,
|
2356 |
true,
|
2357 |
false,
|
2358 |
+
true,
|
2359 |
false,
|
2360 |
true
|
2361 |
],
|
|
|
2364 |
true,
|
2365 |
true,
|
2366 |
true,
|
2367 |
+
true,
|
2368 |
false,
|
2369 |
false,
|
2370 |
true,
|
|
|
2395 |
true,
|
2396 |
false,
|
2397 |
true,
|
|
|
2398 |
true,
|
2399 |
+
true,
|
2400 |
+
true,
|
2401 |
false,
|
2402 |
true
|
2403 |
],
|
|
|
2481 |
true,
|
2482 |
true,
|
2483 |
true,
|
|
|
2484 |
false,
|
2485 |
+
false,
|
2486 |
+
true
|
2487 |
],
|
2488 |
"model.decoder.layers.24.self_attn.k_proj.lora_E": [
|
2489 |
true,
|
|
|
2492 |
true,
|
2493 |
true,
|
2494 |
true,
|
2495 |
+
false,
|
2496 |
true,
|
2497 |
true,
|
2498 |
true,
|
2499 |
true,
|
2500 |
+
false
|
|
|
2501 |
],
|
2502 |
"model.decoder.layers.24.self_attn.out_proj.lora_E": [
|
2503 |
true,
|
|
|
2558 |
"model.decoder.layers.25.encoder_attn.out_proj.lora_E": [
|
2559 |
true,
|
2560 |
true,
|
2561 |
+
false,
|
|
|
2562 |
true,
|
2563 |
false,
|
2564 |
+
false,
|
2565 |
true,
|
2566 |
true,
|
2567 |
true,
|
|
|
2626 |
true
|
2627 |
],
|
2628 |
"model.decoder.layers.25.self_attn.k_proj.lora_E": [
|
|
|
|
|
|
|
2629 |
true,
|
2630 |
false,
|
2631 |
true,
|
2632 |
true,
|
2633 |
+
false,
|
2634 |
true,
|
2635 |
true,
|
2636 |
+
false,
|
2637 |
+
false,
|
2638 |
true,
|
2639 |
true,
|
2640 |
false
|
|
|
2649 |
false,
|
2650 |
false,
|
2651 |
true,
|
2652 |
+
false,
|
2653 |
false,
|
2654 |
false
|
2655 |
],
|
|
|
2698 |
"model.decoder.layers.26.encoder_attn.out_proj.lora_E": [
|
2699 |
true,
|
2700 |
true,
|
2701 |
+
false,
|
2702 |
true,
|
2703 |
true,
|
2704 |
true,
|
|
|
2706 |
true,
|
2707 |
true,
|
2708 |
true,
|
2709 |
+
false,
|
2710 |
true
|
2711 |
],
|
2712 |
"model.decoder.layers.26.encoder_attn.q_proj.lora_E": [
|
|
|
2766 |
false
|
2767 |
],
|
2768 |
"model.decoder.layers.26.self_attn.k_proj.lora_E": [
|
2769 |
+
false,
|
2770 |
true,
|
2771 |
false,
|
2772 |
true,
|
|
|
2774 |
false,
|
2775 |
false,
|
2776 |
false,
|
2777 |
+
false,
|
2778 |
true,
|
2779 |
false,
|
2780 |
false
|
2781 |
],
|
2782 |
"model.decoder.layers.26.self_attn.out_proj.lora_E": [
|
|
|
2783 |
false,
|
2784 |
false,
|
2785 |
true,
|
|
|
2786 |
true,
|
2787 |
+
false,
|
2788 |
+
false,
|
2789 |
true,
|
2790 |
false,
|
2791 |
false,
|
|
|
2794 |
true
|
2795 |
],
|
2796 |
"model.decoder.layers.26.self_attn.q_proj.lora_E": [
|
|
|
2797 |
false,
|
|
|
2798 |
false,
|
2799 |
false,
|
2800 |
false,
|
|
|
2803 |
false,
|
2804 |
false,
|
2805 |
false,
|
2806 |
+
false,
|
2807 |
+
false,
|
2808 |
+
false
|
2809 |
],
|
2810 |
"model.decoder.layers.26.self_attn.v_proj.lora_E": [
|
2811 |
false,
|
2812 |
true,
|
2813 |
false,
|
|
|
|
|
2814 |
false,
|
2815 |
+
true,
|
2816 |
false,
|
2817 |
false,
|
2818 |
true,
|
2819 |
true,
|
2820 |
+
true,
|
2821 |
false,
|
2822 |
true
|
2823 |
],
|
|
|
2845 |
true,
|
2846 |
true,
|
2847 |
true,
|
2848 |
+
false,
|
2849 |
false,
|
2850 |
true
|
2851 |
],
|
|
|
2907 |
],
|
2908 |
"model.decoder.layers.27.self_attn.k_proj.lora_E": [
|
2909 |
true,
|
2910 |
+
false,
|
2911 |
true,
|
2912 |
false,
|
2913 |
false,
|
|
|
2948 |
false
|
2949 |
],
|
2950 |
"model.decoder.layers.27.self_attn.v_proj.lora_E": [
|
2951 |
+
false,
|
2952 |
true,
|
2953 |
false,
|
2954 |
false,
|
|
|
2977 |
],
|
2978 |
"model.decoder.layers.28.encoder_attn.out_proj.lora_E": [
|
2979 |
true,
|
2980 |
+
false,
|
2981 |
true,
|
2982 |
true,
|
2983 |
true,
|
2984 |
true,
|
|
|
|
|
2985 |
true,
|
2986 |
+
false,
|
2987 |
true,
|
2988 |
true,
|
2989 |
true,
|
|
|
3041 |
true,
|
3042 |
true,
|
3043 |
true,
|
3044 |
+
false,
|
3045 |
true,
|
3046 |
true
|
3047 |
],
|
3048 |
"model.decoder.layers.28.self_attn.k_proj.lora_E": [
|
3049 |
+
true,
|
3050 |
true,
|
3051 |
true,
|
3052 |
false,
|
|
|
3068 |
true,
|
3069 |
true,
|
3070 |
false,
|
3071 |
+
false,
|
3072 |
false,
|
3073 |
false,
|
3074 |
false
|
|
|
3097 |
false,
|
3098 |
false,
|
3099 |
false,
|
3100 |
+
false,
|
3101 |
false,
|
3102 |
true
|
3103 |
],
|
|
|
3116 |
false
|
3117 |
],
|
3118 |
"model.decoder.layers.29.encoder_attn.out_proj.lora_E": [
|
|
|
|
|
3119 |
true,
|
3120 |
false,
|
3121 |
false,
|
3122 |
false,
|
|
|
3123 |
false,
|
3124 |
false,
|
3125 |
false,
|
3126 |
false,
|
3127 |
+
false,
|
3128 |
+
false,
|
3129 |
+
false,
|
3130 |
+
false
|
3131 |
],
|
3132 |
"model.decoder.layers.29.encoder_attn.q_proj.lora_E": [
|
3133 |
false,
|
|
|
3191 |
false,
|
3192 |
false,
|
3193 |
false,
|
3194 |
+
false,
|
3195 |
true,
|
3196 |
false,
|
3197 |
false,
|
|
|
3210 |
false,
|
3211 |
false,
|
3212 |
false,
|
3213 |
+
false,
|
3214 |
false
|
3215 |
],
|
3216 |
"model.decoder.layers.29.self_attn.q_proj.lora_E": [
|
|
|
3259 |
true,
|
3260 |
true,
|
3261 |
false,
|
|
|
|
|
3262 |
false,
|
3263 |
true,
|
3264 |
+
false,
|
3265 |
true,
|
3266 |
true,
|
3267 |
false,
|
3268 |
+
false,
|
3269 |
true,
|
3270 |
false
|
3271 |
],
|
|
|
3368 |
false
|
3369 |
],
|
3370 |
"model.decoder.layers.3.self_attn.v_proj.lora_E": [
|
3371 |
+
true,
|
3372 |
false,
|
3373 |
false,
|
3374 |
false,
|
|
|
3378 |
false,
|
3379 |
false,
|
3380 |
true,
|
3381 |
+
false,
|
3382 |
false
|
3383 |
],
|
3384 |
"model.decoder.layers.30.encoder_attn.k_proj.lora_E": [
|
|
|
3396 |
false
|
3397 |
],
|
3398 |
"model.decoder.layers.30.encoder_attn.out_proj.lora_E": [
|
|
|
3399 |
true,
|
3400 |
false,
|
3401 |
false,
|
3402 |
false,
|
3403 |
false,
|
3404 |
+
false,
|
3405 |
+
false,
|
3406 |
+
false,
|
3407 |
+
false,
|
3408 |
true,
|
3409 |
+
false,
|
|
|
|
|
|
|
3410 |
false
|
3411 |
],
|
3412 |
"model.decoder.layers.30.encoder_attn.q_proj.lora_E": [
|
|
|
3475 |
false,
|
3476 |
true,
|
3477 |
false,
|
3478 |
+
true,
|
3479 |
true,
|
3480 |
true
|
3481 |
],
|
|
|
3517 |
false,
|
3518 |
false,
|
3519 |
false,
|
3520 |
+
false,
|
3521 |
false,
|
3522 |
true
|
3523 |
],
|
|
|
3584 |
false,
|
3585 |
false,
|
3586 |
false,
|
3587 |
+
false,
|
3588 |
false,
|
3589 |
true,
|
3590 |
true,
|
|
|
3686 |
false,
|
3687 |
false,
|
3688 |
false,
|
3689 |
+
false,
|
3690 |
false
|
3691 |
],
|
3692 |
"model.decoder.layers.4.encoder_attn.q_proj.lora_E": [
|
|
|
3705 |
],
|
3706 |
"model.decoder.layers.4.encoder_attn.v_proj.lora_E": [
|
3707 |
true,
|
3708 |
+
false,
|
3709 |
false,
|
3710 |
false,
|
3711 |
false,
|
|
|
3714 |
true,
|
3715 |
false,
|
3716 |
true,
|
3717 |
+
false,
|
3718 |
+
false
|
3719 |
],
|
3720 |
"model.decoder.layers.4.fc1.lora_E": [
|
3721 |
true,
|
|
|
3853 |
false,
|
3854 |
true,
|
3855 |
true,
|
3856 |
+
false,
|
3857 |
false,
|
3858 |
true
|
3859 |
],
|
|
|
4048 |
false,
|
4049 |
false,
|
4050 |
false,
|
4051 |
+
false,
|
4052 |
false,
|
4053 |
false,
|
4054 |
false
|
|
|
4069 |
],
|
4070 |
"model.decoder.layers.6.self_attn.v_proj.lora_E": [
|
4071 |
false,
|
4072 |
+
true,
|
4073 |
false,
|
4074 |
false,
|
4075 |
false,
|
|
|
4079 |
false,
|
4080 |
true,
|
4081 |
false,
|
4082 |
+
false
|
4083 |
],
|
4084 |
"model.decoder.layers.7.encoder_attn.k_proj.lora_E": [
|
4085 |
false,
|
|
|
4149 |
true,
|
4150 |
true,
|
4151 |
true,
|
4152 |
+
false
|
4153 |
],
|
4154 |
"model.decoder.layers.7.fc2.lora_E": [
|
4155 |
true,
|
|
|
4428 |
true,
|
4429 |
true,
|
4430 |
true,
|
4431 |
+
true,
|
4432 |
true
|
4433 |
],
|
4434 |
"model.decoder.layers.9.fc2.lora_E": [
|
|
|
4443 |
true,
|
4444 |
true,
|
4445 |
true,
|
4446 |
+
false
|
4447 |
],
|
4448 |
"model.decoder.layers.9.self_attn.k_proj.lora_E": [
|
4449 |
false,
|
|
|
4496 |
true,
|
4497 |
false,
|
4498 |
false,
|
|
|
4499 |
false,
|
4500 |
false,
|
4501 |
+
true,
|
4502 |
false
|
4503 |
],
|
4504 |
"model.encoder.layers.0.fc1.lora_E": [
|
|
|
4550 |
true,
|
4551 |
false,
|
4552 |
false,
|
4553 |
+
false,
|
4554 |
+
false,
|
4555 |
false,
|
4556 |
false,
|
4557 |
false,
|
|
|
4778 |
false,
|
4779 |
true,
|
4780 |
false,
|
4781 |
+
true,
|
4782 |
false
|
4783 |
],
|
4784 |
"model.encoder.layers.11.self_attn.k_proj.lora_E": [
|
|
|
4832 |
false,
|
4833 |
false,
|
4834 |
false,
|
4835 |
+
true,
|
4836 |
false,
|
4837 |
false,
|
4838 |
false
|
|
|
4885 |
false,
|
4886 |
false,
|
4887 |
false,
|
4888 |
+
true,
|
4889 |
false,
|
4890 |
false,
|
4891 |
false,
|
|
|
4926 |
false,
|
4927 |
false,
|
4928 |
false,
|
4929 |
+
true,
|
4930 |
true,
|
4931 |
false,
|
4932 |
false,
|
|
|
4943 |
true,
|
4944 |
false,
|
4945 |
false,
|
4946 |
+
true,
|
4947 |
false,
|
4948 |
false,
|
4949 |
true,
|
|
|
4967 |
false,
|
4968 |
false,
|
4969 |
false,
|
4970 |
+
true,
|
4971 |
false,
|
4972 |
false,
|
4973 |
false,
|
|
|
5003 |
false,
|
5004 |
false,
|
5005 |
false,
|
5006 |
+
false
|
5007 |
],
|
5008 |
"model.encoder.layers.14.fc1.lora_E": [
|
5009 |
true,
|
|
|
5016 |
true,
|
5017 |
false,
|
5018 |
false,
|
5019 |
+
false,
|
5020 |
true
|
5021 |
],
|
5022 |
"model.encoder.layers.14.fc2.lora_E": [
|
|
|
5056 |
false,
|
5057 |
false,
|
5058 |
true,
|
5059 |
+
false,
|
5060 |
false,
|
5061 |
true,
|
5062 |
false
|
|
|
5136 |
true,
|
5137 |
false,
|
5138 |
false,
|
5139 |
+
false,
|
5140 |
true,
|
5141 |
false,
|
5142 |
false,
|
5143 |
+
false,
|
5144 |
false,
|
5145 |
false,
|
5146 |
false
|
|
|
5168 |
false,
|
5169 |
false,
|
5170 |
true,
|
5171 |
+
false,
|
5172 |
false,
|
5173 |
false,
|
5174 |
true
|
|
|
5223 |
true,
|
5224 |
false,
|
5225 |
true,
|
|
|
5226 |
false,
|
5227 |
false,
|
5228 |
false,
|
5229 |
+
true,
|
5230 |
true
|
5231 |
],
|
5232 |
"model.encoder.layers.16.self_attn.q_proj.lora_E": [
|
|
|
5307 |
true,
|
5308 |
true,
|
5309 |
false,
|
5310 |
+
false,
|
5311 |
true,
|
5312 |
+
false,
|
|
|
5313 |
true,
|
5314 |
true
|
5315 |
],
|
|
|
5386 |
"model.encoder.layers.18.self_attn.out_proj.lora_E": [
|
5387 |
false,
|
5388 |
false,
|
|
|
5389 |
false,
|
5390 |
false,
|
5391 |
false,
|
5392 |
+
false,
|
5393 |
+
false,
|
5394 |
+
false,
|
5395 |
true,
|
5396 |
false,
|
5397 |
false,
|
|
|
5474 |
false,
|
5475 |
false,
|
5476 |
false,
|
5477 |
+
true,
|
5478 |
false,
|
5479 |
false,
|
5480 |
false,
|
|
|
5806 |
"model.encoder.layers.22.self_attn.out_proj.lora_E": [
|
5807 |
false,
|
5808 |
false,
|
5809 |
+
false,
|
5810 |
false,
|
5811 |
false,
|
5812 |
true,
|
5813 |
+
false,
|
5814 |
+
false,
|
5815 |
true,
|
5816 |
false,
|
5817 |
false,
|
|
|
5892 |
true,
|
5893 |
true,
|
5894 |
true,
|
5895 |
+
false,
|
5896 |
+
false,
|
5897 |
true,
|
5898 |
true,
|
5899 |
false,
|
5900 |
false,
|
5901 |
+
false,
|
5902 |
true
|
5903 |
],
|
5904 |
"model.encoder.layers.23.self_attn.q_proj.lora_E": [
|
|
|
5916 |
false
|
5917 |
],
|
5918 |
"model.encoder.layers.23.self_attn.v_proj.lora_E": [
|
|
|
5919 |
true,
|
5920 |
false,
|
5921 |
false,
|
5922 |
+
false,
|
5923 |
true,
|
5924 |
true,
|
5925 |
true,
|
|
|
6011 |
true,
|
6012 |
true,
|
6013 |
true,
|
6014 |
+
false
|
6015 |
],
|
6016 |
"model.encoder.layers.25.fc1.lora_E": [
|
6017 |
true,
|
|
|
6092 |
true,
|
6093 |
true,
|
6094 |
true,
|
6095 |
+
false,
|
6096 |
true,
|
6097 |
true,
|
6098 |
true
|
|
|
6227 |
true,
|
6228 |
true,
|
6229 |
false,
|
|
|
|
|
6230 |
true,
|
6231 |
+
false,
|
6232 |
true,
|
6233 |
true,
|
6234 |
+
false,
|
6235 |
true,
|
6236 |
false,
|
6237 |
false,
|
|
|
6309 |
],
|
6310 |
"model.encoder.layers.28.self_attn.out_proj.lora_E": [
|
6311 |
false,
|
6312 |
+
false,
|
6313 |
false,
|
6314 |
true,
|
6315 |
false,
|
|
|
6319 |
true,
|
6320 |
true,
|
6321 |
true,
|
6322 |
+
false
|
6323 |
],
|
6324 |
"model.encoder.layers.28.self_attn.q_proj.lora_E": [
|
6325 |
false,
|
|
|
6400 |
true,
|
6401 |
true,
|
6402 |
true,
|
6403 |
+
false,
|
6404 |
true,
|
6405 |
true,
|
6406 |
true
|
|
|
6425 |
true,
|
6426 |
true,
|
6427 |
true,
|
6428 |
+
false,
|
6429 |
true,
|
6430 |
true,
|
6431 |
true,
|
|
|
6564 |
true,
|
6565 |
true,
|
6566 |
true,
|
|
|
6567 |
true,
|
6568 |
true,
|
6569 |
true,
|
6570 |
true,
|
6571 |
true,
|
6572 |
+
false,
|
6573 |
true,
|
6574 |
false
|
6575 |
],
|
|
|
6649 |
false,
|
6650 |
false,
|
6651 |
true,
|
6652 |
+
false,
|
6653 |
true,
|
6654 |
true,
|
6655 |
true,
|
|
|
6677 |
true,
|
6678 |
true,
|
6679 |
false,
|
6680 |
+
false,
|
6681 |
false,
|
6682 |
true,
|
6683 |
true,
|
|
|
6794 |
false,
|
6795 |
false,
|
6796 |
false,
|
6797 |
+
false,
|
6798 |
false
|
6799 |
],
|
6800 |
"model.encoder.layers.5.self_attn.k_proj.lora_E": [
|
|
|
6871 |
false,
|
6872 |
false,
|
6873 |
false,
|
6874 |
+
true,
|
6875 |
false,
|
6876 |
false,
|
6877 |
false,
|
|
|
7040 |
false,
|
7041 |
false,
|
7042 |
false,
|
7043 |
+
false,
|
7044 |
false,
|
7045 |
false,
|
7046 |
false,
|
|
|
7129 |
false,
|
7130 |
false,
|
7131 |
false,
|
7132 |
+
false,
|
7133 |
+
false,
|
7134 |
false
|
7135 |
],
|
7136 |
"model.encoder.layers.9.self_attn.k_proj.lora_E": [
|
best_checkpoint/adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d8e7c4342456bf184a1699ceee8b79d4637919e86043c74de3fca982e51df19
|
3 |
+
size 38697637
|
best_checkpoint/optimizer.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 174313245
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f14365a17f23ef7c12f92f6afd4d80275de570ff0a27d16f77b510fcb80dec6
|
3 |
size 174313245
|
best_checkpoint/random_states_0.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15691
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46700cb6c6f878730d63a567eac5fae05f37e7908256976b6e8f7afcfd8cb6d8
|
3 |
size 15691
|
best_checkpoint/random_states_1.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15755
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ce290220813dc1fc7fecc91c868657014fa05803fefba4b691a6e114ab59d43
|
3 |
size 15755
|
best_checkpoint/scheduler.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 563
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bb58f2d06c0b6169e26724c3f525aff558b10a892707ea045a33661020fdf75
|
3 |
size 563
|