Add IQ3_K now with ik_llama.cpp PR735
Browse files- README.md +54 -0
- images/perplexity.png +2 -2
README.md
CHANGED
|
@@ -195,6 +195,60 @@ custom=$(
|
|
| 195 |
|
| 196 |
</details>
|
| 197 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
## `IQ3_KS` 13.633 GiB (3.836 BPW)
|
| 199 |
Final estimate: PPL = 9.7940 +/- 0.07795
|
| 200 |
|
|
|
|
| 195 |
|
| 196 |
</details>
|
| 197 |
|
| 198 |
+
## `IQ3_K` 14.509 GiB (4.082 BPW)
|
| 199 |
+
Final estimate: PPL = 9.6849 +/- 0.0768
|
| 200 |
+
|
| 201 |
+
<details>
|
| 202 |
+
|
| 203 |
+
<summary>👈 Secret Recipe</summary>
|
| 204 |
+
|
| 205 |
+
```bash
|
| 206 |
+
#!/usr/bin/env bash
|
| 207 |
+
|
| 208 |
+
custom="
|
| 209 |
+
# 48 Repeating Layers [0-47]
|
| 210 |
+
|
| 211 |
+
# Attention
|
| 212 |
+
blk\.(0)\.attn_q.*=q8_0
|
| 213 |
+
blk\.(0)\.attn_k.*=q8_0
|
| 214 |
+
blk\.(0)\.attn_v.*=q8_0
|
| 215 |
+
blk\.(0)\.attn_output.*=q8_0
|
| 216 |
+
|
| 217 |
+
blk\..*\.attn_q.*=iq5_k
|
| 218 |
+
blk\..*\.attn_k.*=iq6_k
|
| 219 |
+
blk\..*\.attn_v.*=iq6_k
|
| 220 |
+
blk\..*\.attn_output.*=iq5_k
|
| 221 |
+
|
| 222 |
+
# Routed Experts
|
| 223 |
+
blk\.(0|47)\.ffn_down_exps\.weight=q8_0
|
| 224 |
+
blk\.(0|47)\.ffn_(gate|up)_exps\.weight=q8_0
|
| 225 |
+
|
| 226 |
+
blk\..*\.ffn_down_exps\.weight=iq4_k
|
| 227 |
+
blk\..*\.ffn_(gate|up)_exps\.weight=iq3_k
|
| 228 |
+
|
| 229 |
+
# Non-Repeating Layers
|
| 230 |
+
token_embd\.weight=iq4_k
|
| 231 |
+
output\.weight=iq6_k
|
| 232 |
+
"
|
| 233 |
+
|
| 234 |
+
custom=$(
|
| 235 |
+
echo "$custom" | grep -v '^#' | \
|
| 236 |
+
sed -Ez 's:\n+:,:g;s:,$::;s:^,::'
|
| 237 |
+
)
|
| 238 |
+
|
| 239 |
+
./build/bin/llama-quantize \
|
| 240 |
+
--custom-q "$custom" \
|
| 241 |
+
--imatrix /mnt/raid/models/ubergarm/Qwen3-Coder-30B-A3B-Instruct-GGUF/imatrix-Qwen3-Coder-30B-A3B-Instruct-BF16.dat \
|
| 242 |
+
/mnt/raid/models/ubergarm/Qwen3-Coder-30B-A3B-Instruct-GGUF/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf \
|
| 243 |
+
/mnt/raid/models/ubergarm/Qwen3-Coder-30B-A3B-Instruct-GGUF/Qwen3-Coder-30B-A3B-Instruct-PR735-IQ3_K.gguf \
|
| 244 |
+
IQ3_K \
|
| 245 |
+
192
|
| 246 |
+
```
|
| 247 |
+
|
| 248 |
+
</details>
|
| 249 |
+
|
| 250 |
+
|
| 251 |
+
|
| 252 |
## `IQ3_KS` 13.633 GiB (3.836 BPW)
|
| 253 |
Final estimate: PPL = 9.7940 +/- 0.07795
|
| 254 |
|
images/perplexity.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|