Upload folder using huggingface_hub
Browse files- .gitattributes +11 -0
- README.md +94 -0
- gemma-2-9b-it-BF16.gguf +3 -0
- gemma-2-9b-it-F32.gguf +3 -0
- gemma-2-9b-it-Q3_K_L.gguf +3 -0
- gemma-2-9b-it-Q3_K_M.gguf +3 -0
- gemma-2-9b-it-Q3_K_S.gguf +3 -0
- gemma-2-9b-it-Q4_K_M.gguf +3 -0
- gemma-2-9b-it-Q4_K_S.gguf +3 -0
- gemma-2-9b-it-Q5_K_M.gguf +3 -0
- gemma-2-9b-it-Q5_K_S.gguf +3 -0
- gemma-2-9b-it-Q6_K.gguf +3 -0
- gemma-2-9b-it-Q8_0.gguf +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
gemma-2-9b-it-BF16.gguf filter=lfs diff=lfs merge=lfs -text
|
37 |
+
gemma-2-9b-it-F32.gguf filter=lfs diff=lfs merge=lfs -text
|
38 |
+
gemma-2-9b-it-Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
|
39 |
+
gemma-2-9b-it-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
40 |
+
gemma-2-9b-it-Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
41 |
+
gemma-2-9b-it-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
42 |
+
gemma-2-9b-it-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
43 |
+
gemma-2-9b-it-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
44 |
+
gemma-2-9b-it-Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
45 |
+
gemma-2-9b-it-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
|
46 |
+
gemma-2-9b-it-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: gemma
|
3 |
+
library_name: transformers
|
4 |
+
pipeline_tag: text-generation
|
5 |
+
tags:
|
6 |
+
- conversational
|
7 |
+
- gguf
|
8 |
+
- llamacpp
|
9 |
+
---
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
# Gemma 2 9b Instruction Tuned - GGUF
|
14 |
+
|
15 |
+
These are GGUF quants of [google/gemma-2-9b-it](https://huggingface.co/google/gemma-2-9b-it)
|
16 |
+
|
17 |
+
Details about the model can be found at the above model page.
|
18 |
+
|
19 |
+
## Llamacpp Version
|
20 |
+
|
21 |
+
These quants were made with llamacpp tag b3408.
|
22 |
+
|
23 |
+
If you have problems loading these models, please update your software to se the latest llamacpp version.
|
24 |
+
|
25 |
+
|
26 |
+
## Perplexity Scoring
|
27 |
+
|
28 |
+
Below are the perplexity scores for the GGUF models. A lower score is better.
|
29 |
+
|
30 |
+
| Quant Level | Perplexity Score | Standard Deviation |
|
31 |
+
|-------------|------------------|--------------------|
|
32 |
+
| F32 | 8.7849 | 0.06498 |
|
33 |
+
| BF16 | 8.7849 | 0.06498 |
|
34 |
+
| Q8_0 | 8.7869 | 0.06500 |
|
35 |
+
| Q6_K | 8.7972 | 0.06510 |
|
36 |
+
| Q5_K_M | 8.7791 | 0.06489 |
|
37 |
+
| Q5_K_S | 8.7899 | 0.06503 |
|
38 |
+
| Q4_K_M | 8.8745 | 0.06575 |
|
39 |
+
| Q4_K_S | 8.9293 | 0.06636 |
|
40 |
+
| Q3_K_L | 9.0210 | 0.06693 |
|
41 |
+
| Q3_K_M | 9.1213 | 0.06784 |
|
42 |
+
| Q3_K_S | 9.1857 | 0.06726 |
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
## Quant Details
|
47 |
+
|
48 |
+
This is the script used for quantization.
|
49 |
+
|
50 |
+
```bash
|
51 |
+
#!/bin/bash
|
52 |
+
|
53 |
+
# Define MODEL_NAME above the loop
|
54 |
+
MODEL_NAME="gemma-2-9b-it"
|
55 |
+
|
56 |
+
# Define the output directory
|
57 |
+
outputDir="${MODEL_NAME}-GGUF"
|
58 |
+
|
59 |
+
# Create the output directory if it doesn't exist
|
60 |
+
mkdir -p "${outputDir}"
|
61 |
+
|
62 |
+
# Make the F32 quant
|
63 |
+
f32file="${outputDir}/${MODEL_NAME}-F32.gguf"
|
64 |
+
if [ -f "${f32file}" ]; then
|
65 |
+
echo "Skipping f32 as ${f32file} already exists."
|
66 |
+
else
|
67 |
+
python convert_hf_to_gguf.py "~/src/models/${MODEL_NAME}" --outfile "${f32file}" --outtype "f32"
|
68 |
+
fi
|
69 |
+
|
70 |
+
# Abort out if the F32 didn't work
|
71 |
+
if [ ! -f "${f32file}" ]; then
|
72 |
+
echo "No ${f32file} found."
|
73 |
+
exit 1
|
74 |
+
fi
|
75 |
+
|
76 |
+
# Define the array of quantization strings
|
77 |
+
quants=("Q8_0" "Q6_K" "Q5_K_M" "Q5_K_S" "Q4_K_M" "Q4_K_S" "Q3_K_L" "Q3_K_M" "Q3_K_S")
|
78 |
+
|
79 |
+
|
80 |
+
# Loop through the quants array
|
81 |
+
for quant in "${quants[@]}"; do
|
82 |
+
outfile="${outputDir}/${MODEL_NAME}-${quant}.gguf"
|
83 |
+
|
84 |
+
# Check if the outfile already exists
|
85 |
+
if [ -f "${outfile}" ]; then
|
86 |
+
echo "Skipping ${quant} as ${outfile} already exists."
|
87 |
+
else
|
88 |
+
# Run the command with the current quant string
|
89 |
+
./llama-quantize "${f32file}" "${outfile}" "${quant}"
|
90 |
+
|
91 |
+
echo "Processed ${quant} and generated ${outfile}"
|
92 |
+
fi
|
93 |
+
done
|
94 |
+
```
|
gemma-2-9b-it-BF16.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca744f215443891f4a431d30209d95f8d0dd6d14a4ff3e277d826561198a4a42
|
3 |
+
size 18490680000
|
gemma-2-9b-it-F32.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9cd90b8278ffcdb88a1a7554d628e34949e359ca08b0b94d139be72fcf4ad4c
|
3 |
+
size 36972880576
|
gemma-2-9b-it-Q3_K_L.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f78ecceece6cd813f857734076a3c343d08807b34f28e0a869774fef4df6eeff
|
3 |
+
size 5132452544
|
gemma-2-9b-it-Q3_K_M.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2e9f59f7259b8c264444cd2e6687945dea0f85bc1eea03d05dfe1915e080b3c
|
3 |
+
size 4761780928
|
gemma-2-9b-it-Q3_K_S.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00f04bad26222a170ab8274ab60c7465f92eb3a29087b95737a059397383d014
|
3 |
+
size 4337664704
|
gemma-2-9b-it-Q4_K_M.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b81832fbf2374bef685a93a2db4559f2ea6d91a4bc2eee69ba0d0d257afca2ac
|
3 |
+
size 5761057472
|
gemma-2-9b-it-Q4_K_S.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb42f812cf50f18ba70ce322d3327a532fd472a106a338533f87765d2ed74518
|
3 |
+
size 5478924992
|
gemma-2-9b-it-Q5_K_M.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:651036e5957221a51d8c5f0cb4cb135d03158a365438c76e5fb9f5dea09e76e1
|
3 |
+
size 6647366336
|
gemma-2-9b-it-Q5_K_S.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ca05d290fed5940110b4c0cfeb089588f678dd10ef6bd0b50765eefa87e6b7f
|
3 |
+
size 6483591872
|
gemma-2-9b-it-Q6_K.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af29dd73f1ca6eb4426566c00870c4d795a37f064a0efffc7b8f56ad43011e09
|
3 |
+
size 7589069504
|
gemma-2-9b-it-Q8_0.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:399ac085a1814916bf309e7b622e5a91aadaee839a77c26dcc534d17c7af429a
|
3 |
+
size 9827148480
|