Dracones commited on
Commit
30058fe
1 Parent(s): 6e72a46

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ gemma-2-9b-it-BF16.gguf filter=lfs diff=lfs merge=lfs -text
37
+ gemma-2-9b-it-F32.gguf filter=lfs diff=lfs merge=lfs -text
38
+ gemma-2-9b-it-Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
39
+ gemma-2-9b-it-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
40
+ gemma-2-9b-it-Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text
41
+ gemma-2-9b-it-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
42
+ gemma-2-9b-it-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
43
+ gemma-2-9b-it-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
44
+ gemma-2-9b-it-Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
45
+ gemma-2-9b-it-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
46
+ gemma-2-9b-it-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: gemma
3
+ library_name: transformers
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - conversational
7
+ - gguf
8
+ - llamacpp
9
+ ---
10
+
11
+
12
+
13
+ # Gemma 2 9b Instruction Tuned - GGUF
14
+
15
+ These are GGUF quants of [google/gemma-2-9b-it](https://huggingface.co/google/gemma-2-9b-it)
16
+
17
+ Details about the model can be found at the above model page.
18
+
19
+ ## Llamacpp Version
20
+
21
+ These quants were made with llamacpp tag b3408.
22
+
23
+ If you have problems loading these models, please update your software to se the latest llamacpp version.
24
+
25
+
26
+ ## Perplexity Scoring
27
+
28
+ Below are the perplexity scores for the GGUF models. A lower score is better.
29
+
30
+ | Quant Level | Perplexity Score | Standard Deviation |
31
+ |-------------|------------------|--------------------|
32
+ | F32 | 8.7849 | 0.06498 |
33
+ | BF16 | 8.7849 | 0.06498 |
34
+ | Q8_0 | 8.7869 | 0.06500 |
35
+ | Q6_K | 8.7972 | 0.06510 |
36
+ | Q5_K_M | 8.7791 | 0.06489 |
37
+ | Q5_K_S | 8.7899 | 0.06503 |
38
+ | Q4_K_M | 8.8745 | 0.06575 |
39
+ | Q4_K_S | 8.9293 | 0.06636 |
40
+ | Q3_K_L | 9.0210 | 0.06693 |
41
+ | Q3_K_M | 9.1213 | 0.06784 |
42
+ | Q3_K_S | 9.1857 | 0.06726 |
43
+
44
+
45
+
46
+ ## Quant Details
47
+
48
+ This is the script used for quantization.
49
+
50
+ ```bash
51
+ #!/bin/bash
52
+
53
+ # Define MODEL_NAME above the loop
54
+ MODEL_NAME="gemma-2-9b-it"
55
+
56
+ # Define the output directory
57
+ outputDir="${MODEL_NAME}-GGUF"
58
+
59
+ # Create the output directory if it doesn't exist
60
+ mkdir -p "${outputDir}"
61
+
62
+ # Make the F32 quant
63
+ f32file="${outputDir}/${MODEL_NAME}-F32.gguf"
64
+ if [ -f "${f32file}" ]; then
65
+ echo "Skipping f32 as ${f32file} already exists."
66
+ else
67
+ python convert_hf_to_gguf.py "~/src/models/${MODEL_NAME}" --outfile "${f32file}" --outtype "f32"
68
+ fi
69
+
70
+ # Abort out if the F32 didn't work
71
+ if [ ! -f "${f32file}" ]; then
72
+ echo "No ${f32file} found."
73
+ exit 1
74
+ fi
75
+
76
+ # Define the array of quantization strings
77
+ quants=("Q8_0" "Q6_K" "Q5_K_M" "Q5_K_S" "Q4_K_M" "Q4_K_S" "Q3_K_L" "Q3_K_M" "Q3_K_S")
78
+
79
+
80
+ # Loop through the quants array
81
+ for quant in "${quants[@]}"; do
82
+ outfile="${outputDir}/${MODEL_NAME}-${quant}.gguf"
83
+
84
+ # Check if the outfile already exists
85
+ if [ -f "${outfile}" ]; then
86
+ echo "Skipping ${quant} as ${outfile} already exists."
87
+ else
88
+ # Run the command with the current quant string
89
+ ./llama-quantize "${f32file}" "${outfile}" "${quant}"
90
+
91
+ echo "Processed ${quant} and generated ${outfile}"
92
+ fi
93
+ done
94
+ ```
gemma-2-9b-it-BF16.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca744f215443891f4a431d30209d95f8d0dd6d14a4ff3e277d826561198a4a42
3
+ size 18490680000
gemma-2-9b-it-F32.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9cd90b8278ffcdb88a1a7554d628e34949e359ca08b0b94d139be72fcf4ad4c
3
+ size 36972880576
gemma-2-9b-it-Q3_K_L.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f78ecceece6cd813f857734076a3c343d08807b34f28e0a869774fef4df6eeff
3
+ size 5132452544
gemma-2-9b-it-Q3_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2e9f59f7259b8c264444cd2e6687945dea0f85bc1eea03d05dfe1915e080b3c
3
+ size 4761780928
gemma-2-9b-it-Q3_K_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00f04bad26222a170ab8274ab60c7465f92eb3a29087b95737a059397383d014
3
+ size 4337664704
gemma-2-9b-it-Q4_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b81832fbf2374bef685a93a2db4559f2ea6d91a4bc2eee69ba0d0d257afca2ac
3
+ size 5761057472
gemma-2-9b-it-Q4_K_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb42f812cf50f18ba70ce322d3327a532fd472a106a338533f87765d2ed74518
3
+ size 5478924992
gemma-2-9b-it-Q5_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:651036e5957221a51d8c5f0cb4cb135d03158a365438c76e5fb9f5dea09e76e1
3
+ size 6647366336
gemma-2-9b-it-Q5_K_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ca05d290fed5940110b4c0cfeb089588f678dd10ef6bd0b50765eefa87e6b7f
3
+ size 6483591872
gemma-2-9b-it-Q6_K.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af29dd73f1ca6eb4426566c00870c4d795a37f064a0efffc7b8f56ad43011e09
3
+ size 7589069504
gemma-2-9b-it-Q8_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:399ac085a1814916bf309e7b622e5a91aadaee839a77c26dcc534d17c7af429a
3
+ size 9827148480