diff --git "a/measurement.json" "b/measurement.json" new file mode 100644--- /dev/null +++ "b/measurement.json" @@ -0,0 +1,110886 @@ +{ + "measurement": [ + { + "key": "model.layers.0.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.011741794645786285, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.010039303451776505, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.005005646031349897, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.005253964569419622, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.005253830924630165, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0020988518372178078, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.011020767502486706, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.009954367764294147, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.005521690472960472, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.004856838379055262, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.005047182086855173, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.005227583926171064, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.004855519160628319, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.002875327132642269, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.0021949876099824905, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0027878249529749155, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.0019958314951509237, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.0017186423065140843, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.001946514705196023, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.0016548547428101301, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.0018434583907946944, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.001946334494277835, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.001461102394387126, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0016362062888219953, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.011832153424620628, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.010165962390601635, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.004964808933436871, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.0051947287283837795, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.005194354336708784, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.0019248916069045663, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.011482074856758118, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.01006231363862753, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.0054503739811480045, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.004763060249388218, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.004972229246050119, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.005201249849051237, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.00476030120626092, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.0027385856956243515, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.0019873641431331635, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.002708667190745473, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.0017495659412816167, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.0014419619692489505, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0016902106581255794, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.001360619906336069, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0016873546410351992, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0016898202011361718, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0012769345194101334, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0013373164692893624, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.11816699802875519, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.0705794095993042, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.043026261031627655, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.04900796711444855, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04897104203701019, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.02144332230091095, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.08008525520563126, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06514305621385574, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.05589240789413452, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03171461820602417, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03808746114373207, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.04325748234987259, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.03141753748059273, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.023826517164707184, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.021642062813043594, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.022186854854226112, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.012696029618382454, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.01167201902717352, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009476312436163425, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.007783724460750818, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.011408926919102669, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.009426301345229149, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006540065631270409, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.006196490954607725, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.12028311938047409, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.07688909024000168, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.049087993800640106, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.051279447972774506, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.04967961087822914, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.024882830679416656, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.077501080930233, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06934905052185059, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05710895359516144, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.034313615411520004, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.036955736577510834, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.039714232087135315, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.033540427684783936, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.024887504056096077, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02243557572364807, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.02008814550936222, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.014506760984659195, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.013559816405177116, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012045644223690033, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.0105181485414505, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.011203233152627945, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.011938399635255337, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.0081179803237319, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.009360939264297485, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.12357538938522339, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.11462093889713287, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.11191654205322266, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.10164861381053925, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.055504366755485535, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.05289480462670326, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.06199128180742264, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.05718548968434334, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.056318700313568115, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.050355810672044754, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.04806723818182945, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.031493932008743286, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.027437372133135796, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.026743195950984955, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.026588333770632744, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.015859590843319893, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.014426957815885544, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.014352194033563137, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.013502456247806549, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.013398331589996815, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.008878719992935658, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.00964638777077198, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.008623575791716576, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007481908891350031, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.14754511415958405, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.13876834511756897, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13615240156650543, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.12370819598436356, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.0667877197265625, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06418080627918243, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07392510026693344, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.068306103348732, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.06749579310417175, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06102219223976135, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.05804083123803139, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.03727032616734505, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.032368991523981094, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03171287477016449, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.0315668098628521, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.01858256198465824, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.01625785231590271, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.016186891123652458, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.015140757896006107, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.015044014900922775, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.009740359149873257, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.009722740389406681, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.009478806518018246, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.006400903221219778, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.09120667725801468, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.07726530730724335, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.07036212831735611, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.06179375573992729, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.040439482778310776, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.03448771685361862, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.054491449147462845, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.04691503196954727, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.04226250201463699, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0331629142165184, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.032168060541152954, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.02674068510532379, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.022592850029468536, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.019831456243991852, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.019145097583532333, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.013816827908158302, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.011128795333206654, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.010865142568945885, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.009858334437012672, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.009456013329327106, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.008276698179543018, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.008217828348279, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.0074980235658586025, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.006548087578266859, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.02348526567220688, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.015485309064388275, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.008750831708312035, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.009292501024901867, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.009031950496137142, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0038970315363258123, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.016170814633369446, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.014632748439908028, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.01096485648304224, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.007115090731531382, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.0076471068896353245, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.00817128922790289, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.00697338068857789, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.004539649002254009, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.0037572626024484634, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.004106528591364622, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.0026385129895061255, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.0023237522691488266, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.002322938060387969, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.0018606497906148434, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.002214443404227495, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.0023016566410660744, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.001347041572444141, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0016925078816711903, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.020593732595443726, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.013863629661500454, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.007531119976192713, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.008020411245524883, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.007829110138118267, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.003140061628073454, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.015074952505528927, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.013272635638713837, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.009568080306053162, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.006414810661226511, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.006928568240255117, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.007522284984588623, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.0063193002715706825, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.003917571622878313, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.003113748040050268, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.0037849845830351114, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.002254743594676256, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.0019203987903892994, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0020176435355097055, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.0015388790052384138, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0020227651111781597, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0020025502890348434, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.001146503142081201, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0014096737140789628, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.14529357850551605, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09451313316822052, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.067014679312706, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06420602649450302, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.05909202620387077, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03348279371857643, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.08820471167564392, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.07976555824279785, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.06891517341136932, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04131586477160454, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.04267685115337372, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.04507611691951752, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.0380999818444252, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.02874356508255005, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.026098838075995445, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.02251526154577732, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.015257884748280048, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.01416554395109415, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01190805621445179, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009914660826325417, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01168680191040039, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.011333325877785683, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0077410368248820305, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.007325408048927784, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.1662459522485733, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.13470028340816498, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.12172643840312958, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.10008179396390915, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.07529421150684357, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0619962140917778, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09569302201271057, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.08505596965551376, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.07887450605630875, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.05597910284996033, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.05196079984307289, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.049278758466243744, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.041454095393419266, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.03710709884762764, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.03601789474487305, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0249461829662323, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02075735665857792, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.0202650036662817, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01737888529896736, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.016688281670212746, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.013950005173683167, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015167281031608582, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.012269347906112671, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012087606824934483, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.1673189401626587, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15775373578071594, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15474483370780945, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.13968312740325928, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07757586240768433, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07441867887973785, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08614882826805115, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07926052808761597, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07838060706853867, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07028348743915558, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06668445467948914, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0441129207611084, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03843507170677185, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03771228343248367, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03753413259983063, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.022263670340180397, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.020610133185982704, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.020531343296170235, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01928330399096012, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.019187545403838158, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012574742548167706, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.014044292271137238, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01231252308934927, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01119504775851965, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.21316485106945038, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2013973444700241, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.19795377552509308, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1791694313287735, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.09914609789848328, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.09532733261585236, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10958478599786758, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10102514922618866, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.10007267445325851, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09008172899484634, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.08543556928634644, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05591464042663574, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04860876128077507, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04779680445790291, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04759983718395233, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.028027014806866646, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.025449026376008987, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02536061778664589, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.023732231929898262, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.023618677631020546, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.015326297841966152, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01652405597269535, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.014997776597738266, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012471204623579979, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.016841884702444077, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.016412295401096344, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.005205800756812096, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.004705402534455061, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.003932596649974585, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0027117524296045303, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.017005477100610733, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.015927348285913467, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.004044478293508291, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.003548787208274007, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.0034995474852621555, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.0034397810231894255, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.003234193427488208, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.002863155910745263, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.0019957800395786762, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.0024589255917817354, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.00169284304138273, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.000983205740340054, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.0016410433454439044, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.0009001073194667697, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.0016652157064527273, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.0016061807982623577, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.0007361344178207219, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.0007773927063681185, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.058512646704912186, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.04489284008741379, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.037526894360780716, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.03353030979633331, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.024945208802819252, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.01846156269311905, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.035125188529491425, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.03192409127950668, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.0275416299700737, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.01949249766767025, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.01919780671596527, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.01781105063855648, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.015207292512059212, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.012098835781216621, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.01125128474086523, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.008894702419638634, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.006425777450203896, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.006062117405235767, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.005386792588979006, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.004818381741642952, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.004640915431082249, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.004630370996892452, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.0034151743166148663, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0031124150846153498, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.06219617649912834, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.04571424424648285, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.03586192801594734, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.032772064208984375, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.02580939792096615, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.017563389614224434, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.03831528127193451, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.03481806442141533, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.02907426282763481, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.019861234351992607, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.019916323944926262, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.019336458295583725, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.01653977856040001, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.012593665160238743, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.011457267217338085, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.009671948850154877, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.0067603993229568005, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.006261804141104221, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.005648148246109486, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.004884391091763973, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.005034842994064093, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.005076664499938488, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0034914424177259207, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.003429922740906477, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.17343014478683472, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.14140573143959045, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.12862937152385712, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.11265072971582413, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.07719169557094574, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.06370308995246887, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.09750384092330933, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.08816266804933548, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.08236868679523468, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.06118014454841614, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.058127470314502716, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.04959489777684212, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04218074679374695, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.037192266434431076, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.03589503467082977, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.024810930714011192, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.019210824742913246, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.0186618585139513, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01598980650305748, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.015100893564522266, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.013026140630245209, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.01243518851697445, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.010887143202126026, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.007939317263662815, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.18359330296516418, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.16236406564712524, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.15294556319713593, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1321098506450653, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.0843072310090065, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0751233696937561, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10332590341567993, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09372502565383911, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.08714082837104797, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07094352692365646, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.0660063773393631, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05357726663351059, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04560099542140961, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04146996885538101, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.040434326976537704, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.02728724107146263, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02298574335873127, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.022531690075993538, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02054455503821373, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.019932784140110016, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015554677695035934, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016486918553709984, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.014088043011724949, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013036693446338177, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.21562723815441132, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.20227456092834473, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1981753259897232, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1782328188419342, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10097559541463852, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.09630642086267471, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11205262690782547, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10338417440652847, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.1022556871175766, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09045232087373734, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.08524452149868011, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05726420879364014, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04963076114654541, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.048544399440288544, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04830126836895943, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.028662534430623055, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.025425100699067116, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02531568519771099, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.023366905748844147, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.023214317858219147, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01542135514318943, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01598057895898819, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01498482283204794, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.0114506920799613, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.25132977962493896, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23606495559215546, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.23152896761894226, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.20833617448806763, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11759209632873535, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11228420585393906, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13026005029678345, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12027670443058014, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11902822554111481, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.1054607555270195, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09931720048189163, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06631305813789368, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05745438113808632, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.056264884769916534, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05598717927932739, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.033125363290309906, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.028896085917949677, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02877500280737877, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.026442162692546844, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02626388892531395, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017455480992794037, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01732243411242962, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01695733144879341, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01144368201494217, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.19245359301567078, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.1679936647415161, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1586853414773941, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.13813550770282745, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.08647328615188599, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.07733291387557983, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.10284928977489471, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.09389212727546692, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.08951376378536224, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07216634601354599, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.06757840514183044, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.052298180758953094, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04493878409266472, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04163632541894913, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.040832310914993286, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.026267798617482185, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.022061293944716454, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.021765878424048424, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.019344249740242958, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.01882977783679962, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.014319395646452904, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.014597450383007526, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.013196641579270363, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01055855955928564, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.046682510524988174, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.03837323561310768, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.033674586564302444, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.029562005773186684, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.020561309531331062, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.016574973240494728, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.027976172044873238, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.02542630024254322, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.02205117791891098, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.016702834516763687, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.01612362265586853, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.014201113022863865, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.012142330408096313, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.009984758682549, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.009409671649336815, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.007116848602890968, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.005313942674547434, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.005048935767263174, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.004575922153890133, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.004196293652057648, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.003769785398617387, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.0037728939205408096, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.0029525135178118944, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0025904583744704723, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.04761601984500885, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.038031939417123795, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.03196808695793152, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.028172649443149567, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.020487520843744278, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.015569781884551048, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.029558224603533745, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.026733970269560814, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.022306010127067566, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.016473619267344475, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.01618824154138565, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.01497112400829792, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.012729311361908913, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.009912723675370216, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.009129981510341167, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.007486298214644194, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.0052144648507237434, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.0048556262627244, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.004435921553522348, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.003911491949111223, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0038797103334218264, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.003755370154976845, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0027816263027489185, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.002407833468168974, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.1871909201145172, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.15625454485416412, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.14408047497272491, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.12558771669864655, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.08390156179666519, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.07138712704181671, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10333544760942459, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09473664313554764, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.08878353983163834, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.06766127794981003, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.0637902021408081, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05247144773602486, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.045202676206827164, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04031277820467949, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.039077091962099075, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.026239601895213127, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02065451629459858, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.020114773884415627, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01736009679734707, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.016485895961523056, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01348425168544054, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.013070977292954922, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.011431927792727947, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008064975030720234, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.18687382340431213, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.16329823434352875, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.15464840829372406, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.13036559522151947, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.08593996614217758, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.07647949457168579, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.101897694170475, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09279283881187439, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.08858828246593475, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07001335918903351, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.06333412975072861, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05246499925851822, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.045086588710546494, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04205608367919922, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04132303223013878, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.02649807371199131, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.023101728409528732, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02276173047721386, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.020190376788377762, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.01973349042236805, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014826303347945213, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016179237514734268, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.013710103929042816, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012735947035253048, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.22541005909442902, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21125909686088562, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.20694558322429657, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.18623700737953186, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10582608729600906, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10079916566610336, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11761528998613358, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10849089920520782, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.10722179710865021, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0946982279419899, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.08931519091129303, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.060022447258234024, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05198335275053978, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05079435929656029, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.050520360469818115, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.030043229460716248, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.026404093950986862, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.026280228048563004, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024194208905100822, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.024021169170737267, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016045447438955307, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01631302572786808, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.015561038628220558, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011327842250466347, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2602928578853607, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.24420250952243805, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.23929932713508606, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.21560858190059662, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.12228655815124512, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11661508679389954, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13581611216068268, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12523283064365387, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12386825680732727, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10957532376050949, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.1034211739897728, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06923788040876389, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05991446226835251, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.058590881526470184, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05828196555376053, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03462407365441322, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.030129507184028625, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.029994232580065727, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.027557995170354843, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.027361296117305756, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018333518877625465, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018129516392946243, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.017780551686882973, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012022686190903187, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.2166738212108612, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.19121675193309784, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.18169841170310974, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.15955784916877747, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09814732521772385, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.08863293379545212, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11564251780509949, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10569965839385986, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10123441368341446, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08277992904186249, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.07779883593320847, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05866767093539238, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05053132772445679, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.047156982123851776, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.046347543597221375, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.029447434470057487, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.024775201454758644, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02447160705924034, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.0218142569065094, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02128646895289421, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.015901785343885422, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01605824939906597, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.01475357636809349, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011345628648996353, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.06369227916002274, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.053080443292856216, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.046850915998220444, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.04140567034482956, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.028268851339817047, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.023031610995531082, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.03867676481604576, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.03486292436718941, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.03018415905535221, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.02328941971063614, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.022546550258994102, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.019694972783327103, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.016710273921489716, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.013759712688624859, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.012985973618924618, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.009887240827083588, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.007409320678561926, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.007052177097648382, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.0064673698507249355, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.005974548868834972, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.005280279088765383, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.005330062936991453, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.0041557238437235355, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.003790645394474268, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.06256874650716782, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.05038969963788986, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.043326251208782196, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.038408730179071426, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.02699277736246586, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.02113657258450985, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.03834724426269531, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.03439107909798622, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.02948739193379879, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0219899769872427, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.02161184325814247, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.019509321078658104, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.01643429882824421, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.013115344569087029, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.012201817706227303, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.009806922636926174, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.006986511871218681, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.006599305663257837, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0060353511944413185, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.005442915949970484, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.005148075520992279, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0050531188026070595, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0038327767979353666, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0034255986101925373, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.20088176429271698, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.17064349353313446, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.15891975164413452, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.13934843242168427, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.09074987471103668, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.07857336848974228, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11020223796367645, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10094150900840759, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.09536934643983841, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07427741587162018, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.06992053985595703, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05602789297699928, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04827963560819626, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.043609194457530975, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04245833307504654, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.028009435161948204, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02248363383114338, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.021990159526467323, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.019163915887475014, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.01837899535894394, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014510203152894974, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014257458969950676, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.012540793977677822, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00913552287966013, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.18732941150665283, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.16537068784236908, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.15748582780361176, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.13462641835212708, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.08591023087501526, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.07731587439775467, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10288270562887192, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09242438524961472, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.08857236802577972, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07149405032396317, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.06606463342905045, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05294060707092285, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04502089321613312, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04208431392908096, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04136638343334198, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.026760369539260864, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02307891845703125, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.022769581526517868, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.020420093089342117, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.019981401041150093, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014958756044507027, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016127435490489006, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.013825946487486362, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012669534422457218, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2059173583984375, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.19187010824680328, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.18722979724407196, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.16820578277111053, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.09659484028816223, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.09136222302913666, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10848764330148697, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09993714094161987, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.09812604635953903, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.08597109466791153, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.08100783824920654, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.055311739444732666, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.047924261540174484, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04641028121113777, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04605739191174507, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.027711857110261917, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.024101465940475464, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02394005097448826, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02197621949017048, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.021751487627625465, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01476989034563303, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.014915956184267998, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.014154824428260326, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01027509942650795, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.26173463463783264, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.24446429312229156, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.23907168209552765, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.21509960293769836, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.1230301558971405, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11670384556055069, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13731111586093903, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12669381499290466, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12484925985336304, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.1097227931022644, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.10340260714292526, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06992921233177185, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06062943860888481, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05897001177072525, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.058576617389917374, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03498806059360504, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.030290743336081505, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.030109703540802002, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.027585675939917564, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.027342194691300392, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018389536067843437, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018216313794255257, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.017693035304546356, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01196927111595869, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.22654572129249573, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20259083807468414, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.19395442306995392, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.17136698961257935, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10331501811742783, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0945034995675087, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12118279933929443, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1101427748799324, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10625483095645905, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08856787532567978, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08330780267715454, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06165224313735962, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.052809879183769226, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.049741849303245544, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.0489971898496151, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03101050667464733, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.026240460574626923, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.025971056893467903, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.023399893194437027, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.022924624383449554, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016819199547171593, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017070937901735306, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.015730176120996475, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01226724125444889, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.076760433614254, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.06604080647230148, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.06003175675868988, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.052963484078645706, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.03454146161675453, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.029452402144670486, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.045218802988529205, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.0410783477127552, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.03633767366409302, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.02902214787900448, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.027750816196203232, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.022930704057216644, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.01963951252400875, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.016742652282118797, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.01598765142261982, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.011492704972624779, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.008880727924406528, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.008523650467395782, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.007810171693563461, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.007322680205106735, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.006099554244428873, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.006138879340142012, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.0050045582465827465, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.004245077725499868, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.07165830582380295, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.06043315678834915, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.05374646559357643, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.04727565869688988, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.031727660447359085, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.026180021464824677, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.042877838015556335, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.038997139781713486, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.033717479556798935, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.02632269635796547, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.02540370263159275, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.021706225350499153, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.018562227487564087, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.015300479717552662, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.014447866939008236, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01085126306861639, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.007971711456775665, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.007556188851594925, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.006905721500515938, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.0063229575753211975, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.005609418731182814, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.005467557348310947, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.004344412125647068, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0034805622417479753, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.22050558030605316, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.19394756853580475, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.18439973890781403, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.16203264892101288, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10119429230690002, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09077564626932144, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11887875944375992, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10917330533266068, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.1048901304602623, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08502421528100967, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.07959960401058197, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06039318069815636, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05212276428937912, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04849054291844368, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04760981723666191, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.030131492763757706, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02467673271894455, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.024293608963489532, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.021326497197151184, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.020732201635837555, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.015469801612198353, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014918150380253792, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.013911914080381393, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009069439023733139, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.21491265296936035, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.19166070222854614, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18333521485328674, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.15804681181907654, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09920436888933182, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08992710709571838, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11546070128679276, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10567072033882141, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10178607702255249, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08263899385929108, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07636664807796478, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05945824459195137, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05089098960161209, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04793071374297142, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04723275080323219, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.02995169907808304, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.025373706594109535, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02505517192184925, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.022169407457113266, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.021711895242333412, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016355767846107483, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01661076210439205, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.01517627015709877, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0120705496519804, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2002905160188675, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.18686388432979584, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.18242086470127106, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.16392798721790314, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.09405160695314407, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.0890217274427414, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10546180605888367, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09729954600334167, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.09548073261976242, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.08373167365789413, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07893620431423187, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05371727794408798, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04658299684524536, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.0451231487095356, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.044779106974601746, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.0268792062997818, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.023286791518330574, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.023129913955926895, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.0212104469537735, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.020988058298826218, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.014181707054376602, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.014217947609722614, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.013581087812781334, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009535029530525208, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2624628245830536, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.24577389657497406, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.24037545919418335, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.21649771928787231, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.12359641492366791, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11746152490377426, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13784274458885193, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12728570401668549, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12536011636257172, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11044570058584213, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.10415484011173248, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07024067640304565, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06086306273937225, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05922389402985573, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05883009359240532, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03509986028075218, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.03035305254161358, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.030167534947395325, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.027666326612234116, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.027411269024014473, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018372291699051857, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018155526369810104, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01768476516008377, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011785429902374744, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.22943828999996185, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.2080729603767395, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.20006226003170013, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.17810440063476562, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10532774776220322, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09734442085027695, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12288401275873184, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1119082048535347, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10796496272087097, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09166541695594788, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08677222579717636, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06263456493616104, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.053611159324645996, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05064240097999573, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04991280660033226, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03148176148533821, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02655009925365448, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.026276417076587677, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.023892782628536224, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.023436354473233223, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017033366486430168, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017033688724040985, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.015978913754224777, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012023108080029488, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.0732078030705452, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.06263139098882675, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.05586852878332138, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.04936198517680168, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.03282627463340759, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.02732730470597744, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.0448090024292469, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.040400147438049316, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.034663937985897064, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.027543876320123672, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.026583664119243622, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.02273893915116787, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.01931830868124962, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.01592073030769825, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.01502236444503069, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.011407147161662579, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.008477956056594849, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.00804590992629528, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.007467196322977543, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.006886336021125317, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.006014620419591665, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.006006841082125902, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.004709300119429827, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.004127282649278641, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.06917628645896912, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.05788680538535118, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.0505794957280159, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.04466132074594498, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.030510038137435913, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.02457541599869728, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04272822290658951, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.03857774659991264, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.0325748585164547, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.025332005694508553, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.02461335062980652, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.021652910858392715, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.018436023965477943, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.014768969267606735, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.013775484636425972, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.010817838832736015, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.007778686471283436, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.007302714977413416, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.006770881358534098, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.006115891970694065, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.005643063690513372, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.005514994729310274, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.004231423605233431, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0036051918286830187, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.20390307903289795, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.17946916818618774, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1696394681930542, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.14925798773765564, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.09357950836420059, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.08340872079133987, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11230506747961044, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10285434126853943, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.09696212410926819, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07878995686769485, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.07398653030395508, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05703515186905861, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04912995174527168, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04490959644317627, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.043831031769514084, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.02845638245344162, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.022964471951127052, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.022464126348495483, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.019943689927458763, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.019228104501962662, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014624555595219135, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014208933338522911, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.012858377769589424, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008765519596636295, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.20988738536834717, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1862928569316864, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.17745262384414673, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.15387529134750366, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.0968068316578865, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08707857131958008, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11487749218940735, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10415424406528473, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09974855184555054, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08115971088409424, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07526528090238571, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.059080302715301514, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05030640959739685, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04698709398508072, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04618038982152939, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.029681619256734848, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.025064805522561073, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.024678196758031845, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.022043826058506966, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.021533455699682236, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01608492247760296, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016713332384824753, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.014777138829231262, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012343456037342548, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.19549311697483063, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.18275149166584015, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.17836213111877441, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.16049541532993317, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.09195363521575928, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.08707866817712784, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10332425683736801, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09532202035188675, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.09335079789161682, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.08209101110696793, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07742772251367569, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05268354341387749, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04567361995577812, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04415399581193924, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04379291832447052, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.026362938806414604, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.0228473749011755, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.022680504247546196, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02086556702852249, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.020639628171920776, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01392920408397913, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.014034854248166084, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.013301092199981213, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009501875378191471, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.25947457551956177, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.24309492111206055, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2376989871263504, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.21403247117996216, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.12231005728244781, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11616680771112442, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13674813508987427, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12619571387767792, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12408514320850372, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10936474800109863, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.10313966870307922, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0696212649345398, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06037689372897148, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.0586208775639534, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.058210961520671844, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03479650989174843, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.030052676796913147, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.029864633455872536, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.027409007772803307, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.027147380635142326, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018183205276727676, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01800849474966526, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.017452018335461617, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011700372211635113, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.23373185098171234, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.21349339187145233, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.20612263679504395, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.18440671265125275, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10777640342712402, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.10036948323249817, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12498042732477188, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11373060941696167, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.11022786796092987, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09466427564620972, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08989267796278, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06363210082054138, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05453018844127655, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05185726657509804, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.051216091960668564, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03194302320480347, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.027246788144111633, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02700027823448181, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.024697493761777878, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02428746037185192, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017176752910017967, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017484866082668304, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.016230950132012367, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012446092441678047, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.07880662381649017, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.06967642158269882, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.062412749975919724, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.05507039651274681, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.03588404133915901, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.03034130483865738, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.04956411197781563, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.04456650838255882, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.037364501506090164, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.030814405530691147, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.029836727306246758, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.025260435417294502, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.021418284624814987, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.017455345019698143, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.01639847829937935, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.012661227956414223, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.009375957772135735, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.008847430348396301, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.008414529263973236, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.007761395536363125, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.00669725239276886, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.006765533238649368, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.00519756181165576, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.004749409854412079, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.07223519682884216, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.06298884004354477, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.05491138622164726, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.04834117740392685, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.032448846846818924, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.026488900184631348, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04625943675637245, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.041975557804107666, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.03396400436758995, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0276766549795866, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.026935230940580368, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.023492980748414993, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.020094063133001328, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.01571584679186344, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.014522737823426723, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.011769247241318226, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.008322242647409439, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.00769001105800271, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.007395932450890541, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.006608662195503712, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006132136564701796, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006016383413225412, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0045061721466481686, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.003918894100934267, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.21977901458740234, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.19671016931533813, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1877019852399826, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.16487516462802887, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10169027745723724, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09225621819496155, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12029033899307251, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10952064394950867, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.1045893058180809, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08646658807992935, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08086275309324265, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.061171676963567734, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05236399918794632, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.048744890838861465, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04785812273621559, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.030520040541887283, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.024973874911665916, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.024545716121792793, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02184772863984108, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02125040628015995, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.015734756365418434, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015302803367376328, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.014141117222607136, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009611159563064575, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.21555361151695251, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.19254349172115326, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18365967273712158, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.15778177976608276, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09986133128404617, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.09019719064235687, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11958383768796921, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10748075693845749, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10248024016618729, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08341633528470993, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07772227376699448, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06150541454553604, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05228729546070099, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04881389066576958, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04797586426138878, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03109765611588955, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.026632893830537796, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.026248248293995857, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02357676438987255, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.023062733933329582, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.017275836318731308, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01844194531440735, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015996824949979782, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0143160130828619, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.19322505593299866, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.18078060448169708, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.17667603492736816, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.15918689966201782, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.09117583185434341, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.08653688430786133, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10227964073419571, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09415394067764282, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.0925116091966629, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.08144161105155945, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07698295265436172, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.052381932735443115, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04523474723100662, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.0438946858048439, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04357544332742691, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02624044194817543, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.022898025810718536, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02275143936276436, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.020955923944711685, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.020758071914315224, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.014106390066444874, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.014304046519100666, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.013566575944423676, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010028669610619545, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2630857825279236, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2466888278722763, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.24148377776145935, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.21759338676929474, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.12438344955444336, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.1182837039232254, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13914287090301514, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1279660165309906, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12612435221672058, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11119768023490906, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.10520320385694504, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07121138274669647, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.0613960400223732, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.059789568185806274, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.059409502893686295, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03565499186515808, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.030949365347623825, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.030772777274250984, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02829059772193432, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02804873324930668, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01902865245938301, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018960079178214073, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.018363190814852715, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012916077859699726, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.2293836772441864, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20909780263900757, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.20161493122577667, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.18029388785362244, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10578370094299316, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09817006438970566, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12285442650318146, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11193789541721344, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10822363942861557, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0925711914896965, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08794479072093964, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.0626479983329773, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.053598348051309586, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.050838638097047806, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.050172772258520126, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03141497075557709, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02658884786069393, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02633723057806492, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.024004196748137474, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02358148619532585, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016873907297849655, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016934188082814217, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.01591910608112812, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011876104399561882, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.07901906222105026, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.06938285380601883, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.06384523957967758, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.05582309514284134, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.03598543256521225, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.03125154227018356, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.04596109315752983, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.04204897955060005, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.03741645812988281, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03032616525888443, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.028704466298222542, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.02333989366889, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.020095068961381912, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.0173770971596241, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.016685303300619125, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01168033666908741, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.009125243872404099, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.008783560246229172, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.00802106037735939, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.007565193343907595, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.0061378986574709415, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.006158444564789534, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.005119639914482832, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.004154998809099197, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.07236992567777634, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.06216508150100708, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.05627347528934479, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.04894547909498215, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.03241802379488945, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.027379047125577927, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.042494311928749084, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.03874550014734268, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.03400028869509697, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.02690804749727249, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.025602107867598534, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.021597303450107574, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.018513383343815804, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.015621152706444263, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.014877562411129475, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.010806065052747726, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.008170140907168388, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.007798513397574425, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.007090364582836628, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.006585858296602964, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.00561788585036993, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0055515700951218605, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.004503848031163216, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0036260324995964766, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.20772559940814972, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.18339522182941437, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1744174361228943, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.1520071029663086, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.09536392986774445, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.08578348159790039, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11361522972583771, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10311543196439743, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.09862322360277176, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07998374104499817, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.07457863539457321, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05780022218823433, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04932447150349617, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.0457724928855896, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.044886086136102676, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.02887214906513691, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.023536741733551025, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.023127345368266106, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.020426608622074127, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.019838718697428703, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014929503202438354, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014608936384320259, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.013298206962645054, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009390185587108135, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.2182856649160385, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.19882026314735413, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.19135744869709015, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.16518808901309967, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.10117097198963165, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0933479592204094, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11843615770339966, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10756326466798782, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10350584238767624, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08647603541612625, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07968927919864655, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06075604259967804, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05172858014702797, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04883358255028725, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04813888296484947, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.030409671366214752, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.025706324726343155, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.025380145758390427, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02282727137207985, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.022369835525751114, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016236871480941772, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016652293503284454, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015136031433939934, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011900407262146473, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.19032718241214752, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.17799994349479675, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.17385528981685638, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.15634660422801971, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08987744897603989, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.08515454828739166, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10100075602531433, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09291812032461166, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.09119182080030441, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.08012060821056366, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07570932060480118, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05170685425400734, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.044624026864767075, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.043239984661340714, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.042915940284729004, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02588198333978653, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02251785807311535, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.0223627220839262, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.020569462329149246, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.020362021401524544, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01381884515285492, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.014030435122549534, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.013252967037260532, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009768374264240265, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2594975233078003, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.24267764389514923, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.23730140924453735, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.21348105370998383, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.12263552844524384, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11627470701932907, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1373102366924286, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12632912397384644, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12444588541984558, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10923604667186737, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.10318686068058014, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07015369832515717, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06054830178618431, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.058874838054180145, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05847571790218353, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.035077545791864395, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.030355388298630714, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.03017391450703144, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.027636898681521416, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02738671936094761, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018471844494342804, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01843644306063652, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.017750820145010948, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012333309277892113, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.23090864717960358, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.210962176322937, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.20371325314044952, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.18206752836704254, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10660555958747864, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09916719794273376, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12354513257741928, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1125386655330658, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10894085466861725, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09347501397132874, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08849885314702988, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06295648962259293, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.054001156240701675, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05130249261856079, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.05064569413661957, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.031691260635852814, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.026964765042066574, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.026715906336903572, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.024405159056186676, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02399543859064579, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01725481078028679, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017323950305581093, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.016329355537891388, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012348088435828686, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.09903985261917114, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.08733012527227402, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08081812411546707, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.07073398679494858, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.045206911861896515, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0395502932369709, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.05752295255661011, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.052336059510707855, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.04692303389310837, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.038263410329818726, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.036249347031116486, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.029287977144122124, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.025101598352193832, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.021914629265666008, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.021101171150803566, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.014697276055812836, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.011649244464933872, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.011248016729950905, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.010307696647942066, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.00979194138199091, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.007855929434299469, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.007982689887285233, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.006653107702732086, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005635203327983618, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.08969487249851227, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07813387364149094, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.071453757584095, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.062309473752975464, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.040542639791965485, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03483093902468681, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.052798107266426086, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04789105802774429, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04237254336476326, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03402838483452797, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03243284672498703, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.026848919689655304, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.0229084100574255, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.01957526057958603, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.01872299611568451, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01343488972634077, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010238118469715118, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.00982034020125866, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0089554563164711, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.00840039737522602, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006996712647378445, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006909648887813091, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005700059235095978, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004579083062708378, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.21477161347866058, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1934019923210144, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1856224536895752, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.16176997125148773, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.09964131563901901, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09115629643201828, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11546393483877182, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1057109460234642, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10218402743339539, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08445645868778229, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.07838701456785202, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.058653995394706726, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05044560134410858, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04769767075777054, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.047025471925735474, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.029262864962220192, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.024225890636444092, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.023909296840429306, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02108108624815941, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02064068429172039, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014999822713434696, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014457409270107746, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.013787873089313507, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008791189640760422, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.222139373421669, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.20041242241859436, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.19242559373378754, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.16715459525585175, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.10318978130817413, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0945461317896843, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12011855095624924, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10944224148988724, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10559368878602982, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.0867169201374054, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.08171387761831284, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.061679907143116, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05257198587059975, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04967071861028671, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.048960693180561066, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0308346189558506, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.025900064036250114, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02556302212178707, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.022608565166592598, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02215627022087574, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01628408394753933, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016406521201133728, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015162719413638115, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011327119544148445, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.1879827231168747, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.17512007057666779, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.17075735330581665, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.15337510406970978, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08868216723203659, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.08377111703157425, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09998631477355957, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09191922098398209, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.09013644605875015, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07876323908567429, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07443409413099289, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.051304660737514496, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04427354782819748, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04283067584037781, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04248950257897377, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.025728853419423103, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.022567423060536385, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02241402305662632, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.020622624084353447, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.020405367016792297, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013903594575822353, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.014438451267778873, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01333022303879261, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010474138893187046, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.25230851769447327, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23508931696414948, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22953616082668304, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.2060736119747162, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11902011185884476, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11256081610918045, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13371466100215912, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12290626764297485, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12089482694864273, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10559781640768051, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09960693120956421, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06851654499769211, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.059009797871112823, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05724792927503586, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05683397129178047, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03430365398526192, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.029746999964118004, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.029559681192040443, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02705376408994198, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.026794178411364555, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018353981897234917, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018428685143589973, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.017627134919166565, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012730341404676437, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.23652419447898865, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.2160588800907135, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.2084459811449051, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.18605561554431915, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10932203382253647, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.10161087661981583, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12689577043056488, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11569633334875107, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.1117720901966095, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09570048004388809, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.09030140191316605, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06448375433683395, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05541377142071724, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.0525379553437233, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.0518522746860981, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03232936933636665, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.027461206540465355, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02719302661716938, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.024783290922641754, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02435268461704254, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017326325178146362, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017442209646105766, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.016338370740413666, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012177363969385624, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.08912591636180878, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.07898996770381927, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.0735623836517334, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.06379702687263489, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.04086275398731232, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.03604373335838318, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.05099039524793625, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.046587612479925156, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.04218083247542381, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03438074514269829, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.032264593988657, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.025923650711774826, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02225206233561039, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.019706346094608307, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.01906324364244938, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.012963177636265755, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.01030398067086935, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.009972660802304745, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.009022011421620846, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.008602814748883247, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.006813463754951954, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.0068078977055847645, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.00583283044397831, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.004587721545249224, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.083711639046669, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.073410265147686, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06805431842803955, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.058692771941423416, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.03797747939825058, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.033234015107154846, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04804372787475586, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.043521948158741, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.039376839995384216, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03166049346327782, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.029805077239871025, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.024350672960281372, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.020789312198758125, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.018264401704072952, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.017624981701374054, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.012175645679235458, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.009455900639295578, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009137127548456192, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.008195874281227589, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.007763256784528494, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006305788643658161, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006166038569062948, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0052940500900149345, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.003970414865761995, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.2038017362356186, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.18172447383403778, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.17386606335639954, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.15030430257320404, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.09432277083396912, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.08563150465488434, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11082042753696442, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10050903260707855, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.09692242741584778, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07898677885532379, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.07320339977741241, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05648002400994301, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04800622910261154, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04519510269165039, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.044531796127557755, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.028150830417871475, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.023115791380405426, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.022807296365499496, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01998412236571312, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.019523775205016136, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014526557177305222, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014056566171348095, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.013192480430006981, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008852271363139153, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.21345412731170654, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.19303381443023682, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18519262969493866, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.16275477409362793, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09861073642969131, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.09038282185792923, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11621671915054321, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1055702418088913, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10113999247550964, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08425598591566086, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07972356677055359, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05985303223133087, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.050861433148384094, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04769504815340042, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04694272577762604, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.030029211193323135, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.025328056886792183, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.024973629042506218, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.022552691400051117, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.022068697959184647, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01632869802415371, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016716884449124336, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015145162120461464, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01220472902059555, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.1866779774427414, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1728537529706955, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1680736094713211, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.15043902397155762, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08783125877380371, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.08250919729471207, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09981025010347366, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09147524833679199, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.08942000567913055, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07749740034341812, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07309336960315704, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.051178451627492905, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.044033851474523544, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04237255081534386, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04198702424764633, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.025693384930491447, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02230061963200569, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02212711237370968, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.020275818184018135, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.020032251253724098, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01391923613846302, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01426148135215044, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.013257819227874279, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010266564786434174, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.24409489333629608, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22626565396785736, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2203616052865982, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.19702081382274628, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.1148788332939148, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10807573795318604, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.129775732755661, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11919963359832764, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.1169043481349945, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10127314180135727, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09536992013454437, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0664035826921463, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.057155121117830276, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.055195726454257965, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.054736923426389694, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03322768956422806, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02859065867960453, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.028382709249854088, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025854989886283875, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.025558117777109146, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01761874184012413, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017623113468289375, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01679486781358719, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011990965344011784, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.24274778366088867, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.22194969654083252, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.21436916291713715, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.19107156991958618, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.11261828243732452, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.10482216626405716, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13062812387943268, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11871019005775452, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.11508777737617493, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09855884313583374, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.09305425733327866, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06678465753793716, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05699329823255539, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05423985794186592, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.05357035622000694, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03354250639677048, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.028492914512753487, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.028239235281944275, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02575288526713848, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02533084526658058, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.018100732937455177, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.018276633694767952, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.017115892842411995, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01300876960158348, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.1011391133069992, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.08931607753038406, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.0831487625837326, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.07202843576669693, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.046547502279281616, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04093460366129875, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.05793017894029617, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.052830085158348083, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.04808126017451286, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03892936185002327, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.03652762994170189, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.029481273144483566, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.025340000167489052, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.022526221349835396, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.021807970479130745, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.014772297814488411, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.011908501386642456, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.011563715524971485, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01043764315545559, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.009986242279410362, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.007823549211025238, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008019519038498402, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.006735522765666246, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005645579658448696, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.0885789543390274, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07778243720531464, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07141020148992538, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06161969527602196, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.040347661823034286, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.034932009875774384, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05189761146903038, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04707329720258713, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.0417720265686512, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.033671747893095016, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.031767334789037704, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.026361385360360146, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02255384996533394, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.01947939395904541, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.018706228584051132, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.013203071430325508, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010219820775091648, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009826666675508022, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.008910535834729671, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.00840371660888195, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006938913371413946, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006889223121106625, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005782357417047024, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0046500409953296185, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.214242622256279, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.188893660902977, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.17965836822986603, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.15436692535877228, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.09854670614004135, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.08850600570440292, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11761119961738586, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10585208237171173, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10175685584545135, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08157539367675781, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.07554613053798676, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.059934101998806, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.050614871084690094, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.047263022512197495, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04645392671227455, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.030007993802428246, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02423662133514881, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02388072945177555, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02078620344400406, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.020230446010828018, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.015507896430790424, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014891678467392921, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.013791595585644245, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00947708822786808, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.22006605565547943, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1989268958568573, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18526335060596466, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.16155965626239777, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.10246161371469498, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.09092742949724197, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1322190761566162, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11865639686584473, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10526015609502792, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08798951655626297, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.08360637724399567, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06837249547243118, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05742136016488075, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.049991559237241745, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04810325801372528, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03434915840625763, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.026971016079187393, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.026010511443018913, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.024224188178777695, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.023041680455207825, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.018644701689481735, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.018954003229737282, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015976525843143463, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013899309560656548, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.18762587010860443, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.17334935069084167, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1682087779045105, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.15040963888168335, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08836060762405396, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.08277228474617004, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10102607309818268, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.0924433246254921, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.0900789201259613, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07772589474916458, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07339516282081604, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05192876234650612, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.044614747166633606, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04278313368558884, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04235227778553963, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02610187977552414, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.022765306755900383, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.022574566304683685, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.020705122500658035, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.020440977066755295, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.014305619522929192, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01490319799631834, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.013578786514699459, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011060534976422787, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2441127747297287, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2258056104183197, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21962858736515045, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.19617100059986115, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11480823159217834, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10778472572565079, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13034389913082123, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11947667598724365, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11695036292076111, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10098069161176682, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09503049403429031, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06650280952453613, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05727018415927887, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05515114963054657, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05464975908398628, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03328896686434746, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.028512246906757355, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.028284605592489243, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025714855641126633, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.025398794561624527, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017568763345479965, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017521586269140244, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01667466014623642, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011793214827775955, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.24364304542541504, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.22267310321331024, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.2150244265794754, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.19135615229606628, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.11319371312856674, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.10530994832515717, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13150078058242798, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11935310810804367, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.11573845148086548, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09895909577608109, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.09338823705911636, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06717728078365326, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05738627165555954, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05457829684019089, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.05389070883393288, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03377239778637886, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.028696531429886818, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02844412624835968, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02592305652797222, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.025490907952189445, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.018150031566619873, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01845051907002926, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.017128167673945427, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013182811439037323, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.10802026093006134, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.0947747677564621, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08724828064441681, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.07566020637750626, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.049522168934345245, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04296494275331497, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06427653133869171, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.057464513927698135, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05137994512915611, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04139375314116478, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.03943315148353577, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.032896220684051514, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.027684876695275307, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.024036962538957596, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02311583235859871, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016579605638980865, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.01284823939204216, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012399221770465374, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011300019919872284, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.010713274590671062, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008915555663406849, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.00890460703521967, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007459564600139856, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006373201962560415, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09597031772136688, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.0836804211139679, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07665026932954788, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06623078137636185, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.043617770075798035, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.037564922124147415, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05680614709854126, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.0509476400911808, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04531138762831688, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03626319393515587, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.034341223537921906, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0289266686886549, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.024450913071632385, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.021123886108398438, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.02024480700492859, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.014521907083690166, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.011169794015586376, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.01074148528277874, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009751361794769764, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009182263165712357, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007677237968891859, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007642668206244707, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0063398173078894615, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005291965324431658, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.21748143434524536, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.19344308972358704, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.18425209820270538, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.15977472066879272, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10081244260072708, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09107083827257156, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12057974189519882, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10850886255502701, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10393070429563522, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08439339697360992, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.07853758335113525, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06145000830292702, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05186876654624939, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04837827384471893, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04753349721431732, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03070962056517601, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.024755975231528282, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.024361051619052887, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.021392524242401123, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02080000750720501, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01587699167430401, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015162174589931965, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.014202128164470196, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009509454481303692, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.2371080219745636, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.21270175278186798, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.2037079632282257, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.17852818965911865, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.10995236784219742, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.10005368292331696, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12974123656749725, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11740681529045105, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.11310902237892151, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09363733977079391, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.08791189640760422, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06691215932369232, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.056747451424598694, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.05335468426346779, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.05252694711089134, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03360424190759659, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02831423282623291, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.027959197759628296, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.025128765031695366, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.024611184373497963, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01822556182742119, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.018645863980054855, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.01690809056162834, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0136393578723073, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.19269320368766785, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1775253564119339, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.17225509881973267, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.15399420261383057, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.09089042246341705, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.08499694615602493, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10408348590135574, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09487688541412354, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.09266693890094757, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0796913132071495, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07537446916103363, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.053597692400217056, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04594782739877701, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04415236786007881, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04372125118970871, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02703186124563217, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.023716796189546585, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.0235311109572649, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.021584350615739822, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.021327055990695953, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.015110960230231285, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015801023691892624, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01441844180226326, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012035581283271313, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2527051568031311, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23310193419456482, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22665202617645264, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.20225757360458374, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11906326562166214, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11159536242485046, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13472965359687805, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12367593497037888, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12124497443437576, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10428325086832047, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09808798134326935, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06907355040311813, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.059401530772447586, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05732116848230362, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.056829266250133514, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03457760065793991, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.029873494058847427, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.029650753363966942, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02693639136850834, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.026620987802743912, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018478766083717346, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01869618333876133, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.017616938799619675, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.013032704591751099, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.2478085309267044, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.2254660427570343, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.2164190709590912, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.19247905910015106, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.11486822366714478, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.10599596053361893, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.1354227513074875, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1229294165968895, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.11760507524013519, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.10003544390201569, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.09455396980047226, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06904910504817963, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.0590537004172802, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05544605478644371, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.05456123501062393, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.034773532301187515, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.029292194172739983, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02893383614718914, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.026423070579767227, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02586330473423004, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01884981244802475, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.019129587337374687, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.017564313486218452, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013759964145720005, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.10881142318248749, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09691264480352402, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09120594710111618, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.07918515801429749, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05029285326600075, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04490404203534126, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06136861443519592, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05592791363596916, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.051776088774204254, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04228397086262703, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.03960154950618744, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.031230127438902855, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.026747163385152817, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.02419864572584629, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.023568708449602127, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.015619100071489811, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.012588009238243103, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012278878130018711, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.010997938923537731, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.010587252676486969, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008187728002667427, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008132574148476124, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007166983559727669, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005438397638499737, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09433106333017349, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08392929285764694, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07817342132329941, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06792347133159637, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04335778206586838, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03828193619847298, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.054191261529922485, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04929201304912567, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.044640202075242996, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.036478206515312195, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03435598686337471, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.027513016015291214, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.023567382246255875, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.020895106717944145, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.020228412002325058, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.013752012513577938, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.01086394302546978, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.010517262853682041, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.00949972402304411, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009049046784639359, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007196436636149883, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007109171710908413, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006164770573377609, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00468017254024744, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.22808997333049774, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.2030225545167923, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.19422641396522522, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.16794708371162415, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10554256290197372, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09569108486175537, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12441172450780869, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11236803233623505, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10857350379228592, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0881747305393219, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08178321272134781, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06350340694189072, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.053680166602134705, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.050568509846925735, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04981258139014244, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03170128911733627, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02573583461344242, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.025378216058015823, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.022152520716190338, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.021639946848154068, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.016255145892500877, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015454443171620369, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.014620653353631496, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009458114393055439, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.24713556468486786, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.22511450946331024, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.21687883138656616, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.19255892932415009, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.11486488580703735, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.1061083972454071, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.13377822935581207, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12189411371946335, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.11744311451911926, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09916260838508606, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.09478164464235306, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06870517879724503, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05875261873006821, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.05557693913578987, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.05482573062181473, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03445500135421753, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.029433226212859154, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02906249463558197, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.026362532749772072, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02587183751165867, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01854788325726986, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.019243309274315834, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.017350565642118454, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.014020062983036041, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.1975872814655304, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.18209712207317352, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1766018569469452, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1579943746328354, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.09323626011610031, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.08716552704572678, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10658429563045502, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09753978997468948, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.09507889300584793, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.08185803145170212, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07725714147090912, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05480913072824478, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.0471322275698185, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04519565403461456, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04472564905881882, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02753477171063423, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02407725900411606, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.023867856711149216, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.021874597296118736, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.021594734862446785, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.015020711347460747, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015811137855052948, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.014242463745176792, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011781350709497929, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.25779736042022705, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2380070686340332, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2313624769449234, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.20665813982486725, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.1215183362364769, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11394200474023819, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13776566088199615, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1263505071401596, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.1238078773021698, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10658524930477142, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.1002579927444458, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07063143700361252, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.0606611929833889, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.0584755502641201, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05794619023799896, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.0353793203830719, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.0303737074136734, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.03014199435710907, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.027377821505069733, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02705102600157261, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01891842670738697, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018858565017580986, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01800217106938362, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01296945195645094, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.2561430037021637, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.23183763027191162, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.2228560596704483, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.19774501025676727, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.11871831119060516, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.10942226648330688, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.1388198286294937, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12605682015419006, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.12176082283258438, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.10262760519981384, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.09691419452428818, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.07101627439260483, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.06049242988228798, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05716477707028389, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.0563490130007267, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03571110963821411, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02993941307067871, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02964082546532154, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02678993158042431, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.026272812858223915, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.0192415788769722, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.019153621047735214, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.018050188198685646, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01345626637339592, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11225896328687668, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1002998873591423, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.0936051681637764, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08170218020677567, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.051913194358348846, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04603441059589386, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06495679914951324, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05903549864888191, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.053479257971048355, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04402691125869751, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04155769571661949, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.033038970082998276, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02831410989165306, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.025093916803598404, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02428639866411686, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016549749299883842, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.013222415931522846, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012811808846890926, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011673858389258385, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.01115479227155447, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008724142797291279, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008849424310028553, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007477320730686188, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006124233826994896, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09390267729759216, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.0842738151550293, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07707849144935608, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06744582951068878, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04313254728913307, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03754318505525589, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05679270252585411, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05155019834637642, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04443419352173805, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03690416365861893, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03519903123378754, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.02878793701529503, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.024630172178149223, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.020872723311185837, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.01989344134926796, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.014409482479095459, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010947119444608688, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.010437616147100925, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009717462584376335, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009087439626455307, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.00755153875797987, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.00746780913323164, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006198291666805744, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0049742297269403934, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.2389557957649231, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.21358664333820343, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.2041861116886139, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.17707566916942596, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.11082515120506287, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.10076909512281418, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.13230857253074646, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.118557870388031, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.11404518783092499, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09324532747268677, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08677227795124054, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06766105443239212, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.056762516498565674, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.0531858429312706, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.05234584957361221, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03383154422044754, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.027300182729959488, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.026903269812464714, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.023709923028945923, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.023145196959376335, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01747971400618553, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.01677902415394783, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.015615538693964481, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.01076575368642807, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.25732484459877014, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.22609715163707733, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.21397314965724945, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.18599843978881836, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.11939609795808792, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.10614411532878876, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.14319981634616852, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12957561016082764, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.12313006818294525, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.098194919526577, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.09355485439300537, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.07352732867002487, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.06254897266626358, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.057868365198373795, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.056730709969997406, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03692504018545151, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.030707640573382378, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.030207255855202675, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.026602728292346, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02587253972887993, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.019856000319123268, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.020368337631225586, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.01811973564326763, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.014756552875041962, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.18882425129413605, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1745489239692688, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1690705567598343, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.15165984630584717, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08925388753414154, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.08346585184335709, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10272053629159927, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09393986314535141, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.09102166444063187, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07876458019018173, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07450482249259949, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05285116285085678, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04546862468123436, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04336123540997505, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04286126419901848, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.026572588831186295, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.023220952600240707, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.022999990731477737, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.021207019686698914, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.020911602303385735, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.014556524343788624, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.0154486745595932, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.013737075962126255, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011652197688817978, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.25494253635406494, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23626753687858582, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22974850237369537, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.20585662126541138, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.12031705677509308, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11308344453573227, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13666419684886932, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12534843385219574, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12255731970071793, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10614804923534393, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.1001206561923027, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06999430805444717, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06021546944975853, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05795147642493248, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05741601437330246, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03506387025117874, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.030171850696206093, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.029926082119345665, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02733440138399601, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.026987377554178238, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018703056499361992, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01884729415178299, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01775662787258625, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.013047164306044579, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.24717804789543152, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.223474383354187, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.21440699696540833, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.19040706753730774, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.11453927308320999, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.10530312359333038, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.135431170463562, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12220698595046997, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.11748228967189789, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09913719445466995, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.0937461107969284, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06933292001485825, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05904136598110199, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05545918643474579, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.054558563977479935, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03520862013101578, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02955385483801365, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02920953929424286, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.026617029681801796, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.026062631979584694, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.019243070855736732, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01962866447865963, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.017928317189216614, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.014488442800939083, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.12340318411588669, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.11090809851884842, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10401201248168945, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.09132064133882523, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05719150975346565, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.05113140866160393, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07155104726552963, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06458400189876556, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.058793842792510986, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04892263561487198, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04636054486036301, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03651602193713188, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.0310114286839962, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.02768784947693348, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.026855584233999252, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.018409352749586105, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.014656194485723972, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.014235884882509708, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.013051868416368961, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.012514768168330193, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009807877242565155, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009855780750513077, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.008461160585284233, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0069439709186553955, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09817249327898026, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08830468356609344, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08036819100379944, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.07074751704931259, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04505441337823868, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.038955871015787125, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06037331745028496, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05445975437760353, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04638946056365967, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03891424462199211, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.037435296922922134, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03076125681400299, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.026116693392395973, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.021786794066429138, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.02064167894423008, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.015426188707351685, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.011444068513810635, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.010850034654140472, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010235129855573177, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009514125995337963, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008087251335382462, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.00789392739534378, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006480598356574774, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005250188056379557, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.2587392032146454, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.23626554012298584, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.22776314616203308, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.20164228975772858, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.1214473694562912, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.11231128871440887, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1425018608570099, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12894952297210693, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.1242738664150238, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10518816858530045, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09896563738584518, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07284240424633026, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06171976402401924, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.058306049555540085, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.05747102200984955, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.036420781165361404, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.029781367629766464, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.029404668137431145, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.026412764564156532, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.0258793868124485, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.018745223060250282, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.017980987206101418, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.017137009650468826, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011254237033426762, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.24327176809310913, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.21873775124549866, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.2102481722831726, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1791900396347046, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.11256411671638489, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.10321638733148575, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1310560554265976, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1189892441034317, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.11518432199954987, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.0934123694896698, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.08617320656776428, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06724382936954498, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.057112593203783035, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.054160308092832565, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.05346148833632469, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03366730362176895, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.028294270858168602, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.027970923110842705, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.024503299966454506, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.024037953466176987, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.017948970198631287, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.017960937693715096, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.016791589558124542, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012511461973190308, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.18555781245231628, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.17217890918254852, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.16711615025997162, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.15013892948627472, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08770134299993515, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.08240929245948792, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10037817060947418, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09201635420322418, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.08936071395874023, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07785115391016006, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07361830025911331, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05169632285833359, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.0445890836417675, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04266569763422012, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04219992831349373, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.026017149910330772, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02292824536561966, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.022720882669091225, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.021042246371507645, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02076544798910618, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.014346573501825333, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015323471277952194, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.013607888482511044, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011651933193206787, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.25895288586616516, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.24104037880897522, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.23490247130393982, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.21097517013549805, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.12222163379192352, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11530812084674835, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13796661794185638, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12688936293125153, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12427273392677307, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10841819643974304, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.10214968025684357, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0704149380326271, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06081276386976242, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05869996175169945, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.058200348168611526, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03521471470594406, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.03024481050670147, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.030017154291272163, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02742655761539936, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.027106544002890587, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01848081685602665, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018407583236694336, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01759568601846695, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012209041975438595, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.24776288866996765, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.2241288721561432, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.21513307094573975, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.19100354611873627, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.11431054770946503, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.10518988221883774, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13410396873950958, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12198246270418167, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.11728225648403168, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0989316925406456, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.0933321863412857, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06830127537250519, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.058536652475595474, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05503895878791809, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.05419883131980896, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03436346724629402, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.028876952826976776, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02854534424841404, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.0258621983230114, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02532276138663292, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01854855753481388, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.018572017550468445, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.017326051369309425, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013082216493785381, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.12117373198270798, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1097666472196579, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10387485474348068, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.0914585143327713, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05623985081911087, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.05085379257798195, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06841614842414856, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06239187344908714, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05757743492722511, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.048376791179180145, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.0456484816968441, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03477995842695236, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.029872452840209007, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.02708306536078453, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.026388339698314667, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01742585189640522, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.014087465591728687, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.013730810955166817, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012516917660832405, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.012070307508111, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.00914172176271677, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009091540239751339, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.008043969050049782, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006089754868298769, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.10113202035427094, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09196561574935913, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08482304215431213, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.07485748827457428, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.046527039259672165, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.04104984924197197, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06023267284035683, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05505061894655228, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.047707878053188324, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.040477972477674484, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.0385737419128418, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.030424442142248154, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02622942440211773, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.022410763427615166, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.021428154781460762, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.015227402560412884, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.011635176837444305, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.011099357157945633, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010426620952785015, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.00978400930762291, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007940745912492275, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007761667482554913, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006539377849549055, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004971791990101337, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.26382023096084595, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.2425999790430069, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.23481543362140656, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.2087438851594925, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.12405756860971451, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.11559958010911942, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1445237249135971, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1304958313703537, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.12669040262699127, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10836002975702286, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.10192793607711792, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07376445829868317, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.062437962740659714, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.059447385370731354, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.05874685198068619, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03690478578209877, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.03024117834866047, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.029912130907177925, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.026966921985149384, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02651110291481018, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.018939929082989693, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.018010461702942848, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.01741616055369377, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011072293855249882, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.24945662915706635, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.22735999524593353, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.21970723569393158, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1942332535982132, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.11539773643016815, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.1069394052028656, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.13320833444595337, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12170196324586868, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.11813250929117203, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.10012144595384598, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.09402371197938919, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06859096884727478, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05877256020903587, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.05582445487380028, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.055104684084653854, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.034675415605306625, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02966218627989292, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02933366410434246, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.026695281267166138, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02625383250415325, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01904507912695408, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.019455954432487488, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.01791326142847538, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.014341413974761963, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.17468243837356567, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.16238954663276672, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1575310230255127, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.14155779778957367, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08252925425767899, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.0775456354022026, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09486735612154007, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.0869235247373581, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.084075428545475, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07333676517009735, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06946614384651184, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04871262237429619, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04201555997133255, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.040051523596048355, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03957917168736458, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02448265813291073, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.021402383223176003, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.021188857033848763, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.019638679921627045, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.019353238865733147, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01335823256522417, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.014181079342961311, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.012606970965862274, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01061955001205206, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.25358664989471436, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23656059801578522, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.23056167364120483, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.20725460350513458, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11964811384677887, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11304832994937897, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.135221928358078, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12438242137432098, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12160173058509827, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.1063607931137085, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.10037439316511154, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06903176009654999, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.059592366218566895, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.057456519454717636, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05695578083395958, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03452948480844498, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02958722785115242, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02935158461332321, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.026884742081165314, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.026561763137578964, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018147103488445282, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017999209463596344, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.017262781038880348, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011906333267688751, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.24023142457008362, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.21653813123703003, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.20714887976646423, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.18424925208091736, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.1105852797627449, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.10120674222707748, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13038592040538788, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11883065849542618, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.1136532723903656, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09552481770515442, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.0902230441570282, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.0665457621216774, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05704905465245247, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.0532672181725502, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.05233589932322502, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.033514104783535004, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02795703336596489, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02758672647178173, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.025015216320753098, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02442222833633423, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01806078478693962, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01807328686118126, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.0167147908359766, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012701083905994892, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.1029699295759201, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09357140213251114, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08771160989999771, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.07790260761976242, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.047644853591918945, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.042784400284290314, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.0598105825483799, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05441923812031746, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.04894326999783516, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04152693599462509, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.03958472982048988, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.030435647815465927, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.026057405397295952, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.022993940860033035, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02222377061843872, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.015229472890496254, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.01206556148827076, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.011669710278511047, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.010860200971364975, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.010371244512498379, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.00799494981765747, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008032074198126793, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.006822109688073397, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005461282562464476, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.0903051421046257, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08164382725954056, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07463133335113525, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06642923504114151, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.041224777698516846, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03610127046704292, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05470705032348633, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04976634681224823, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.042568910866975784, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0360533781349659, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03463127091526985, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.02760220505297184, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.023647017776966095, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.019897345453500748, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.01891336962580681, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.013803203590214252, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010413493029773235, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009899654425680637, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009389976039528847, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008759312331676483, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007219942752271891, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0070958868600428104, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005838138051331043, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0046401419676840305, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.22311358153820038, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.20048461854457855, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1896357387304306, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.16872870922088623, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10256170481443405, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09252949059009552, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1275120973587036, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11424155533313751, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10621139407157898, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08893860876560211, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08466654270887375, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06501183658838272, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.054541125893592834, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04922669380903244, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04789706692099571, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.032465290278196335, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.025299200788140297, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.024665217846632004, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02252093330025673, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02165556699037552, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.016687672585248947, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015949483960866928, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.01426458079367876, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010048712603747845, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.23885959386825562, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.21232490241527557, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.20244595408439636, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.17544089257717133, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.11035851389169693, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0995846763253212, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.13051554560661316, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11876488476991653, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.11369111388921738, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09273118525743484, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.08591845631599426, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06714818626642227, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05736654996871948, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.053517453372478485, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.05259403586387634, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03370312601327896, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.028457412496209145, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.028041483834385872, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02504468895494938, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.024463266134262085, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.018199607729911804, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.018887240439653397, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.016731034964323044, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013853438198566437, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.17957505583763123, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.16666966676712036, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.16169588267803192, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.14561976492404938, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08465076982975006, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07950235158205032, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09710264950990677, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08912831544876099, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.08628664910793304, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07532721757888794, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07126261293888092, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.049787454307079315, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04301896318793297, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04100136086344719, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.040522292256355286, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.025034544989466667, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.021798642352223396, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.021580463275313377, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.019994569942355156, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01969785988330841, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013633686117827892, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.014290965162217617, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.012861364521086216, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010551122948527336, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.25828641653060913, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.24107258021831512, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.23502562940120697, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.21163250505924225, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.1219160184264183, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11526411026716232, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1378909796476364, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12668776512145996, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12392836809158325, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10864066332578659, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.10270151495933533, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07056942582130432, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06082412600517273, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05867255479097366, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05816342309117317, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.035469818860292435, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.030512068420648575, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.030277051031589508, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.027848968282341957, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.027528513222932816, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.0190311037003994, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01896912045776844, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.018159352242946625, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.013087915256619453, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.24968314170837402, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.22388415038585663, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.21362879872322083, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.1891459971666336, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.1149328425526619, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.10463012754917145, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13648641109466553, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12381653487682343, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.11838215589523315, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0986144095659256, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.09301870316267014, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06964867562055588, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05944179370999336, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05541861802339554, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.054434794932603836, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.035054270178079605, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02916908450424671, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.028792981058359146, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.025991356000304222, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02535783313214779, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01897094026207924, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.018980219960212708, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.017516857013106346, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013459729962050915, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.1170780211687088, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10697893053293228, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1007404699921608, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08945430815219879, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05439586937427521, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04918236657977104, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06813444942235947, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.061633575707674026, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05577356368303299, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04759873449802399, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04531809687614441, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.034804269671440125, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02964143641293049, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.026351990178227425, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02553241141140461, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.017485370859503746, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.013985522091388702, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.013578129932284355, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012672603130340576, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.012168290093541145, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009336811490356922, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009465517476201057, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.008060792461037636, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006727331783622503, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.10034485906362534, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09029138088226318, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08170045167207718, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.07283684611320496, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04578864201903343, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.039426978677511215, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06169300898909569, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05640179663896561, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04734969884157181, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.040106527507305145, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.0387626513838768, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.031297240406274796, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.026962151750922203, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.0221172496676445, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.020819082856178284, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.015661245211958885, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.01159266009926796, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.010921081528067589, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010477237403392792, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009653779678046703, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008168327622115612, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008046814240515232, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006415692623704672, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005235594697296619, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.24213124811649323, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.21821339428424835, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.2071634978055954, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.1855713576078415, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.11226698756217957, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.10168249905109406, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.13868694007396698, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12387175858020782, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.11603407561779022, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09758996218442917, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09323525428771973, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07112331688404083, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05924176797270775, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05397602170705795, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.052673373371362686, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03557770699262619, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.027692480012774467, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.027064628899097443, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.024697834625840187, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.023843659088015556, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01836932636797428, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0173194520175457, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.015816958621144295, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010893937200307846, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.21891126036643982, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.19252575933933258, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18333418667316437, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1541416198015213, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09970185905694962, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08962391316890717, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11745987087488174, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10634100437164307, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.1022590771317482, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.0810094103217125, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.0750579684972763, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.060179226100444794, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05140291526913643, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04842131584882736, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.0476909801363945, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03042450360953808, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.026168329641222954, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.025834275409579277, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.022707661613821983, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.022239049896597862, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01697498932480812, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01780284009873867, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015833023935556412, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013641712255775928, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.16974274814128876, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1576397866010666, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15273426473140717, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.13758322596549988, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07991687208414078, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.0749531015753746, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09189049899578094, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08452697098255157, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.0814637616276741, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07116850465536118, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06741846352815628, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.047037556767463684, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04070724919438362, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03862254321575165, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.038119200617074966, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.023585014045238495, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.020364830270409584, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.020135320723056793, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.018652265891432762, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.018350310623645782, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012662669643759727, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013178868219256401, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011861506849527359, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009492394514381886, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.24994423985481262, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23323480784893036, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22722339630126953, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.20467650890350342, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11775363981723785, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11129433661699295, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13347919285297394, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12282519042491913, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11973318457603455, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10509391874074936, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09927257895469666, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06811151653528214, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05887181684374809, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.056592799723148346, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.056047797203063965, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03410836309194565, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02925204113125801, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.028991710394620895, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.026680003851652145, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02633582428097725, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018033284693956375, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01797405257821083, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.017117325216531754, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012063993141055107, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.24084794521331787, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.2149728536605835, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.20417684316635132, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.18065732717514038, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.11061535030603409, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.10010451823472977, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13275066018104553, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12037846446037292, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.1140921413898468, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09469675272703171, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.0894174873828888, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.0679241418838501, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05796901881694794, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05346028506755829, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.052342262119054794, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03433394432067871, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.028366530314087868, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.027926838025450706, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02530980296432972, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.024607693776488304, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.018797649070620537, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01884881593286991, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.017207134515047073, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013633666560053825, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11898012459278107, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10904130339622498, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10259194672107697, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.09162724763154984, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.055216457694768906, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.049976564943790436, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06944054365158081, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06304261088371277, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.056637782603502274, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04874257370829582, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04661942645907402, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03544297814369202, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.030199015513062477, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.026681313291192055, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.025787873193621635, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0177531149238348, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.014028497971594334, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.013586394488811493, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012755891308188438, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.012206205166876316, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.00940590538084507, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.00937342643737793, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.008065241388976574, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006437685806304216, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09845162183046341, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08960966765880585, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08051550388336182, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.0721944272518158, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04498283192515373, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03872188553214073, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.061577025800943375, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05660485476255417, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04639069363474846, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03986898437142372, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03867430239915848, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03129220008850098, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02691599912941456, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.021762147545814514, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.020365817472338676, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.015664933249354362, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.011447928845882416, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.010695652104914188, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010449470020830631, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009556323289871216, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008222127333283424, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008061070926487446, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006385313346982002, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00523722730576992, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.25762081146240234, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.23437491059303284, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.2241276204586029, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.2009229212999344, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.11996977776288986, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.10995177924633026, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.14558720588684082, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.13032738864421844, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.12346590310335159, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10502810776233673, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09969312697649002, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0744127556681633, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.062345292419195175, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05754049867391586, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.056374114006757736, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.037157148122787476, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02936357446014881, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.0288124717772007, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02628367394208908, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.025530558079481125, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01906103454530239, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.017994578927755356, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.01665426604449749, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011084874160587788, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.23390880227088928, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.20507514476776123, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.19537939131259918, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1688764989376068, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.10748622566461563, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0961029902100563, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12650923430919647, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11417990922927856, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.11032775044441223, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08780305087566376, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.08223604410886765, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06515248119831085, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05553418770432472, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.05248565226793289, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.05174325779080391, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.032931700348854065, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.028593143448233604, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.028252726420760155, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02493148297071457, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02447512373328209, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.018365275114774704, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.019681213423609734, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.017141085118055344, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01533055491745472, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.16721582412719727, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1554984748363495, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15080063045024872, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1359308958053589, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07865900546312332, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07388416677713394, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09020797163248062, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08306054025888443, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.08016284555196762, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07013854384422302, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06643325835466385, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0461675263941288, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.039948657155036926, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.037968654185533524, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.0374966561794281, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.023162640631198883, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.019983507692813873, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.01976461336016655, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.018311068415641785, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.018021099269390106, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01245881523936987, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012861380353569984, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011713922955095768, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009195752441883087, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2476758062839508, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23140865564346313, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2255418300628662, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.20332647860050201, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11674630641937256, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11042176187038422, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13247732818126678, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12168534845113754, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11870356649160385, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10438426584005356, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09879249334335327, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0676673874258995, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05839478597044945, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05615285038948059, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.055615611374378204, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03397317975759506, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02917337417602539, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02892449125647545, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.026687441393733025, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.026353878900408745, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018284840509295464, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018138427287340164, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.017421120777726173, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012445118278265, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.23311735689640045, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.2076624482870102, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.19630104303359985, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.1730528175830841, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10693265497684479, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09614399075508118, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.1298157274723053, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11773039400577545, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.11030489951372147, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09122046083211899, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08620867133140564, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06646713614463806, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05684866011142731, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05180526152253151, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.05055014789104462, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03377906233072281, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.027687029913067818, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.027169430628418922, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.024696437641978264, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.023922806605696678, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01877698302268982, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01873510703444481, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.017082327976822853, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013743367046117783, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11854872852563858, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10841572284698486, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10046179592609406, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.09023886173963547, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05487827584147453, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.048774316906929016, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07169825583696365, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06485788524150848, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05639643967151642, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04859992861747742, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04681922867894173, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03658644109964371, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.0310914758592844, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.026596032083034515, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02543656900525093, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01837174966931343, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.014126441441476345, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.013524781912565231, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012913748621940613, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.012196262367069721, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.00978377927094698, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009772364981472492, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.008086668327450752, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006802357733249664, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09881409257650375, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08897601813077927, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07789318263530731, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.070052869617939, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04477224498987198, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.037287406623363495, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06474094837903976, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.0588090680539608, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04653206840157509, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03966354578733444, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03886891528964043, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03274199366569519, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.027980173006653786, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.021712223067879677, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.019974127411842346, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.016390064731240273, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.01149689219892025, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.010585146024823189, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010503850877285004, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.00941682793200016, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008597178384661674, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008374550379812717, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0063454401679337025, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005462429951876402, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.2461298555135727, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.21992382407188416, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.20805330574512482, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.18626061081886292, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.11295710504055023, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.10164438933134079, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.14066724479198456, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12526267766952515, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.11714313924312592, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09785057604312897, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09335468709468842, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07209640741348267, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.059879716485738754, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05427967384457588, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.0528692789375782, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03602379187941551, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.027825506404042244, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.0271591953933239, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02473529428243637, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.023811426013708115, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0185177493840456, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.017418449744582176, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.015673231333494186, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010838855989277363, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.24238137900829315, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.20899450778961182, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.19675442576408386, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1653173565864563, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.1107429713010788, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.09762032330036163, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1325366497039795, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12071909010410309, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.1151408925652504, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08818399906158447, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.08276010304689407, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0680793970823288, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.058215655386447906, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.05368911474943161, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.05257025361061096, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03424060717225075, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02869296818971634, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02822817862033844, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.024386493489146233, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02366054616868496, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.018568938598036766, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.019295860081911087, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.016854986548423767, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01423712819814682, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.17265747487545013, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.16117773950099945, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15674972534179688, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.14125491678714752, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08136560767889023, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07679685205221176, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09249325096607208, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08528515696525574, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.0827498733997345, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07270661741495132, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06876303255558014, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.047241080552339554, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04096711054444313, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03920501098036766, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03878078609704971, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.023665543645620346, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.020491400733590126, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.020294655114412308, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.018772924318909645, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01851019263267517, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012636941857635975, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012961121276021004, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01196750346571207, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009098355658352375, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2583028972148895, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.24197198450565338, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.23619398474693298, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.21301616728305817, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.1216617226600647, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11543687433004379, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13711565732955933, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1262706071138382, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12356530874967575, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10891588777303696, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.1030225083231926, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06988944858312607, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06045341119170189, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05840389057993889, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.057912178337574005, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.035045184195041656, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.030069705098867416, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.029843414202332497, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.027459798380732536, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.027151891961693764, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018595010042190552, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018252119421958923, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01778804138302803, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012067321687936783, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.2350577712059021, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20968419313430786, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.19914455711841583, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.1755095273256302, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10759389400482178, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0972568690776825, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12829364836215973, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11716990917921066, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.11087852716445923, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09176313877105713, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08651281148195267, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06555679440498352, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.056185487657785416, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05177032947540283, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.050674762576818466, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03304686397314072, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.027104662731289864, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02666134387254715, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.0240059532225132, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.023302894085645676, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017880242317914963, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017567284405231476, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.016339559108018875, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012156721204519272, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11518840491771698, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10440337657928467, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09652786701917648, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08678264170885086, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05311517417430878, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.046941127628088, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06947717815637589, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06283320486545563, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05478452146053314, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04681205376982689, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04526948556303978, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03548283502459526, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.030085375532507896, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.025719964876770973, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02458522655069828, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.017809107899665833, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.013598776422441006, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.013016333803534508, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012375726364552975, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.01166654285043478, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009481118991971016, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009348488412797451, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007815476506948471, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006411343812942505, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.10049249231815338, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09003040939569473, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07938967645168304, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.07150077819824219, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04566473513841629, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.038076892495155334, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06531897932291031, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.059053726494312286, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04753059148788452, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04028327018022537, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03955555334687233, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03311343491077423, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.028259437531232834, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.022116174921393394, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.020416008308529854, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.016574637964367867, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.011630269698798656, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.010735943913459778, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010577894747257233, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009502217173576355, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008593577891588211, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008359897881746292, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006333887577056885, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005347426515072584, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.24432700872421265, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.21561524271965027, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.20160192251205444, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.1802205890417099, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.11120828986167908, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09840673208236694, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.14212246239185333, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.126351997256279, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.11608325690031052, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09582002460956573, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09164921939373016, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07323461771011353, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06038747355341911, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05339626595377922, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.05160576105117798, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03661450371146202, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.027446800842881203, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.026594925671815872, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02429073676466942, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.023114195093512535, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01880202442407608, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.017520003020763397, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.015355206094682217, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010865279473364353, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.25094085931777954, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.2249126136302948, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.21582728624343872, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.18301206827163696, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.11522213369607925, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.10534267872571945, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.13290242850780487, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1222134605050087, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.11827018111944199, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09593755751848221, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.08761908859014511, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06808842718601227, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05867809057235718, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.055454012006521225, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.05465482175350189, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03418133035302162, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.029063107445836067, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02870904467999935, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.025264358147978783, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.024738000705838203, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.018248021602630615, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.018608219921588898, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.016957921907305717, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013077059760689735, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.17000453174114227, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1588810384273529, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15474747121334076, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.13954930007457733, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08014548569917679, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07577024400234222, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09056904166936874, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08363639563322067, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.08144202828407288, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07165120542049408, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.0677182525396347, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0461876280605793, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04014413431286812, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.038563840091228485, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03819018974900246, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.023140516132116318, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.020054582506418228, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.019874650985002518, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01835748739540577, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.018125034868717194, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01228219736367464, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012509961612522602, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011681185103952885, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008623989298939705, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2676945626735687, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2509790062904358, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.24530041217803955, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.2212177813053131, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.12628206610679626, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11990468204021454, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.14139871299266815, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.13058073818683624, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12815208733081818, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11305198073387146, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.1067144125699997, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0721227377653122, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06253568083047867, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.060589298605918884, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.06011321768164635, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.036073703318834305, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.031140824779868126, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.030930375680327415, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.028455298393964767, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02816164866089821, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01898244023323059, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018808536231517792, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01819787174463272, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012382333166897297, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.23583582043647766, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.21054847538471222, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.19982178509235382, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.17685994505882263, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10773136466741562, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09739874303340912, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.1286163330078125, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11758154630661011, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.11106949299573898, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09213621914386749, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08703289926052094, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06519761681556702, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05625410005450249, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.0517919547855854, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.050681792199611664, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03284970670938492, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02703143283724785, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.026576319709420204, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.023962832987308502, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.023246001452207565, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017673274502158165, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01741817407310009, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.016134561970829964, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011909054592251778, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11552730947732925, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10419560968875885, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09591171145439148, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08617481589317322, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05310496687889099, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04663751646876335, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06981663405895233, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06338843703269958, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05501445010304451, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04671679809689522, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04521799832582474, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.035712774842977524, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.030380437150597572, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.025733599439263344, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.024528296664357185, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.017955657094717026, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.01363803818821907, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.013033771887421608, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012393384240567684, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.011641102842986584, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009598671458661556, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009447681717574596, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007882049307227135, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006501982919871807, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.10655055940151215, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.0953608825802803, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08398163318634033, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.0755782425403595, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04844515025615692, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.04040726274251938, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06898686289787292, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06282635033130646, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.050449173897504807, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04270404949784279, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.041835565119981766, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03504302352666855, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.03002963401377201, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.023534249514341354, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.021736253052949905, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.017540166154503822, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.012475477531552315, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.01155701745301485, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.011356854811310768, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.010246817022562027, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.009183844551444054, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.009049501270055771, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006854380946606398, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.006000937893986702, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.23727688193321228, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.20775650441646576, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.19274887442588806, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.17208795249462128, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10743466019630432, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09392289817333221, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.13996796309947968, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12410634011030197, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.11259948462247849, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09201952069997787, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08842203766107559, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07168793678283691, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05918137729167938, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05160987749695778, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04968104138970375, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03582616522908211, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.026524154469370842, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.025594409555196762, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.023391615599393845, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02211669832468033, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01834653504192829, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.017111968249082565, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.01471620798110962, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010591564700007439, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.23808199167251587, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.20433130860328674, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.19221583008766174, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.15164814889431, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.10970434546470642, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.09590363502502441, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12984764575958252, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11856229603290558, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.11324107646942139, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08364018052816391, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.0746816024184227, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06645502895116806, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05708029493689537, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.053108714520931244, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.052150625735521317, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03336181491613388, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.028149526566267014, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02769814245402813, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02312687784433365, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.022469379007816315, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.018041472882032394, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.018570221960544586, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.016475172713398933, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013439727015793324, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.18328043818473816, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.17124249041080475, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.16688227653503418, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.15049171447753906, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08634784817695618, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.0816202238202095, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09749169647693634, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09008637815713882, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.08775034546852112, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07715896517038345, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07285858690738678, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.049672991037368774, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.0431678481400013, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04149121046066284, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.0410873144865036, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.024823620915412903, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02146717719733715, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.021275747567415237, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01960975117981434, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01935691200196743, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013053126633167267, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013232504017651081, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.012400749139487743, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008935395628213882, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.27227723598480225, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.25510284304618835, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.24935078620910645, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.2248653769493103, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.12840747833251953, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.12191221117973328, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.14366209506988525, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.13280387222766876, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.13029882311820984, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11495229601860046, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.10842858254909515, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07317006587982178, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06354879587888718, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.06155288964509964, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.061081063002347946, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.036533452570438385, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.03153664991259575, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.03131576254963875, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.028771573677659035, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02847226895391941, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.019034620374441147, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018894633278250694, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.018223468214273453, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012209326028823853, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.24286571145057678, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.21703548729419708, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.20620682835578918, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.18317070603370667, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.11110695451498032, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.100651815533638, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.132823184132576, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12102832645177841, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.11456464231014252, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0953705683350563, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.09030906111001968, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06738130748271942, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05795398727059364, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.053456082940101624, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.0523516945540905, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03394993394613266, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.027971072122454643, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.027519995346665382, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.0248865969479084, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.024170244112610817, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.018424000591039658, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.018085027113556862, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.016869356855750084, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012490163557231426, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11722759157419205, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10623858124017715, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09839460998773575, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08842223882675171, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.054029107093811035, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04780511558055878, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07030382007360458, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06369081884622574, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05582535266876221, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04757915437221527, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04593568295240402, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.035854797810316086, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03049379214644432, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.026100607588887215, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.024959120899438858, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01797451823949814, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.013713791966438293, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.013150094076991081, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012446955777704716, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.011743409559130669, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009527255780994892, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009306300431489944, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007886632345616817, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006267756223678589, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.10522744804620743, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.093859001994133, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08396714925765991, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.07551541924476624, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04768730700016022, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.04055316001176834, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06570763140916824, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06008819490671158, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.049695659428834915, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04181700572371483, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.040739573538303375, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0333201140165329, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02852926403284073, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.02308330312371254, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.021601412445306778, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.016695184633135796, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.012134098447859287, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.011385291814804077, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010993000119924545, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.010057357139885426, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008722798898816109, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008539504371583462, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.00671209255233407, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005584067199379206, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.2505713105201721, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.22409150004386902, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.21187259256839752, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.18952220678329468, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.11504141241312027, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.10344011336565018, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.14212465286254883, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12801317870616913, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.119211845099926, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09964358806610107, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09487242996692657, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0725075900554657, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06114663556218147, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05523240193724632, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.05375022068619728, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.036245834082365036, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.028297962620854378, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.027586786076426506, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02515801042318344, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02419336512684822, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.018588077276945114, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.017730500549077988, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.01583705097436905, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010997951962053776, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.23431502282619476, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.19980408251285553, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18752767145633698, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1504669189453125, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.1061583086848259, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0927535817027092, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12647828459739685, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11567464470863342, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.11059132218360901, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08212630450725555, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07410670071840286, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06478898227214813, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05564766377210617, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.051312752068042755, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.050268806517124176, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.032512761652469635, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.027152834460139275, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.026679620146751404, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02252589724957943, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.021804826334118843, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.017569787800312042, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01793483830988407, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015905329957604408, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012844252400100231, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.1864737570285797, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.17425401508808136, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.16978687047958374, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.15296143293380737, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08788295090198517, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.08307019621133804, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09934250265359879, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09163231402635574, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.08929307013750076, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07845745235681534, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07409363240003586, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05061056464910507, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04394172504544258, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04226025938987732, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04185657575726509, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02538587898015976, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.021936051547527313, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.021741412580013275, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02003847062587738, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.019787028431892395, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013548201881349087, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013600715436041355, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.012910464778542519, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00929265096783638, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.27742332220077515, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.25980043411254883, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2538760006427765, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.22880108654499054, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.13091041147708893, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.12419064342975616, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.14646278321743011, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.13537904620170593, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.1328427940607071, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.1170416995882988, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.11035047471523285, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07467494159936905, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06480401009321213, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.06278583407402039, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.06230432167649269, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03732515871524811, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.032211896032094955, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.03198086470365524, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.029361478984355927, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.029056359082460403, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01954587921500206, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.019353805109858513, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.018722843378782272, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012589715421199799, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.2454107254743576, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.21947301924228668, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.20831260085105896, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.18553315103054047, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.11232510209083557, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.1015343889594078, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.1338222771883011, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12289125472307205, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.11583753675222397, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09664605557918549, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.09175200015306473, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06818974018096924, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05893415957689285, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.0539836548268795, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.052794862538576126, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03431439399719238, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02818761207163334, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.027693698182702065, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.0251067616045475, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.024331659078598022, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01837281696498394, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.018190527334809303, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.01665792241692543, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012416159734129906, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.1241627112030983, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.112869031727314, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10462567210197449, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.09409871697425842, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05745141580700874, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.050884053111076355, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07425661385059357, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.0676986500620842, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05916754528880119, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.05070198327302933, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04890011250972748, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03791116923093796, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03245215490460396, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.027805831283330917, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02662384882569313, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.018976397812366486, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.014694819226861, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.014085984788835049, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.013377569615840912, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.012633836828172207, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.010022968053817749, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.010062643326818943, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.008312304504215717, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006900900509208441, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.11318586021661758, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.10221952944993973, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.09119313955307007, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.08214624971151352, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.05182155221700668, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.043943025171756744, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.07214067131280899, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.0659002885222435, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.053702421486377716, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.045834098011255264, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.04471106454730034, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03668273985385895, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.031486645340919495, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.025086769834160805, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.023347899317741394, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.018337983638048172, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.013187086209654808, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.012282589450478554, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.011997872963547707, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.010918723419308662, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.009579645469784737, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.009343239478766918, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.007319021038711071, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.006051225587725639, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.26569950580596924, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.24019378423690796, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.22911827266216278, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.2051757127046585, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.12285816669464111, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.11218246817588806, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.14903132617473602, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.13410361111164093, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.1268000453710556, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10720721632242203, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.10190845280885696, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07592051476240158, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06404556334018707, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.0589296892285347, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.05762479081749916, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03790372237563133, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.03011721558868885, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02952324040234089, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.026896117255091667, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.026071537286043167, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.019452817738056183, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.018526174128055573, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.01698818802833557, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011472544632852077, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.2262723743915558, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.19808819890022278, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18632985651493073, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1577632874250412, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.10433372855186462, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.09229274839162827, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12504759430885315, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11444795876741409, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10728998482227325, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08451051265001297, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07707363367080688, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06387560814619064, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05489858239889145, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.05025504529476166, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04912084341049194, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03196795657277107, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.026199325919151306, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02561371773481369, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02223464846611023, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02146647870540619, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016936006024479866, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01685498096048832, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015147263184189796, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011416852474212646, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.18103410303592682, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.16912841796875, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1647355854511261, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.14833888411521912, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08540444821119308, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.08068081736564636, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0963849350810051, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08902064710855484, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.086787149310112, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0761972963809967, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.0718972310423851, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04913345351815224, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04268714040517807, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04104306176304817, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04065873846411705, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.024590270593762398, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02126956731081009, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.021089255809783936, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.019413353875279427, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01916654035449028, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013007830828428268, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013143081218004227, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.012374037876725197, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008921355940401554, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2800564169883728, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.262245774269104, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2561470568180084, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.23087413609027863, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.13226790726184845, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.12544187903404236, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.14834706485271454, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.13675448298454285, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.13420453667640686, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11811204999685287, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.11153042316436768, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07556840777397156, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06550215929746628, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.06346224993467331, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.06298504769802094, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.037880007177591324, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.03264417499303818, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.03241987153887749, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.029766032472252846, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02945791371166706, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.02003578282892704, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.019747931510210037, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.019211582839488983, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.013023331761360168, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.24401183426380157, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.21803875267505646, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.20670196413993835, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.18496562540531158, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.11152868717908859, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.10069890320301056, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13369432091712952, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12238267064094543, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.11504734307527542, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0961916446685791, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.09153638035058975, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06777217984199524, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.058521028608083725, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05360942706465721, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.05239548161625862, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03417545557022095, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.027938449755311012, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.027434276416897774, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.024909019470214844, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02413181960582733, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01841077022254467, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01798810251057148, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.016741828992962837, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01219868753105402, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.1293337643146515, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.11795935034751892, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10979912430047989, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.09875765442848206, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.06002511829137802, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.053444113582372665, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07712603360414505, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.07019113004207611, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.06170414760708809, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.05298873037099838, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.05103473737835884, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.039298929274082184, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03359869495034218, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.028984108939766884, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.027814386412501335, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.019678009673953056, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.015196782536804676, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.01458318904042244, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.013809246942400932, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.013061321340501308, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.010313533246517181, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.010220698080956936, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.008589491248130798, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006831396371126175, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.1127459704875946, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.10212170332670212, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.09173985570669174, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.08260124176740646, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.051777858287096024, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.04425615444779396, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.07113341987133026, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06511687487363815, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.053492605686187744, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04581258445978165, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.04449676722288132, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03603503108024597, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.031143948435783386, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.025003187358379364, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.023375192657113075, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.017972802743315697, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.013102966360747814, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.012239107862114906, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.011919460259377956, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.010892453603446484, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.00935520138591528, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.009176813066005707, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.007229270413517952, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005880466662347317, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.2666322886943817, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.2416657954454422, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.2307581901550293, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.20654255151748657, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.12346450984477997, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.11281318962574005, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.150150865316391, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.13474370539188385, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.12713971734046936, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10793328285217285, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.10263869166374207, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07657497376203537, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06438089162111282, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05922877416014671, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.057965103536844254, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03829081356525421, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.030219022184610367, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.029618006199598312, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.027025915682315826, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02617654576897621, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.019660385325551033, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.01854242943227291, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.01716424897313118, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011352242901921272, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.24777323007583618, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.2075507789850235, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.19316469132900238, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1637071818113327, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.11277371644973755, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.09596659988164902, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1368575096130371, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12357045710086823, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.11778807640075684, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08831676840782166, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.08108189702033997, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06970827281475067, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05940061807632446, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.05440586060285568, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.05318576470017433, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03516659140586853, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.028427431359887123, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.0278625525534153, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.023564239963889122, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02269790507853031, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01906929910182953, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.018372243270277977, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.016971010714769363, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01252079289406538, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.1807900369167328, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.16874851286411285, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.16438192129135132, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1479242444038391, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08534739166498184, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.08060300350189209, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09644873440265656, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08898407220840454, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.08674110472202301, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07606247812509537, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07176370918750763, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04923088476061821, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04270581528544426, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.041052114218473434, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04066073149442673, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02468099072575569, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.021316198632121086, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02113349735736847, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.019444778561592102, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.019201839342713356, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013164645060896873, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013238213025033474, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.012536531314253807, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009071124717593193, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.28063714504241943, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2625964283943176, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2565779387950897, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.23101994395256042, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.1326003074645996, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.12573795020580292, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.14825519919395447, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1371077299118042, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.13457924127578735, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11832165718078613, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.11155922710895538, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07556156814098358, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06561564654111862, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.0635610818862915, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.0630761906504631, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03772154077887535, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.03251131996512413, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.03229180723428726, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.029576322063803673, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.029267320409417152, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.0196070559322834, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.019400816410779953, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01876073330640793, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012437397614121437, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.2433633804321289, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.216870978474617, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.2049555480480194, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.1840868443250656, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.11098650097846985, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09991084039211273, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13368047773838043, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12271743267774582, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.11476273834705353, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09597690403461456, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.09155342727899551, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06800806522369385, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05871419608592987, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.053361374884843826, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.05203830450773239, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03408975154161453, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.027833692729473114, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02728026546537876, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.024876106530427933, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02402782253921032, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01810338906943798, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.018028289079666138, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.016254035755991936, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012211584486067295, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.12246479094028473, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1111752912402153, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.102445088326931, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.09210270643234253, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05668160319328308, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04984765872359276, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07411085814237595, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06755362451076508, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.0584319569170475, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04993508756160736, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04822860658168793, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0377504825592041, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03240986540913582, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.027405796572566032, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.026105012744665146, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.018907928839325905, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.014427020214498043, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.013753671199083328, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.013104255311191082, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.012285803444683552, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009878460317850113, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009886583313345909, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.00802772119641304, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006628722418099642, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.10819445550441742, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09736620634794235, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.0856194868683815, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.07706958055496216, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.049363430589437485, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.04114094376564026, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.0705283135175705, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06435130536556244, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.05118919909000397, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04360338672995567, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.04263044148683548, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03566460311412811, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.030667457729578018, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.023914262652397156, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.02204272337257862, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.017821330577135086, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.012589157558977604, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.01163054071366787, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.011464073322713375, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.010313502512872219, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.009351968765258789, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.009069819003343582, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0070050484500825405, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00586701137945056, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.25552481412887573, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.22747355699539185, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.2137981653213501, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.1912512183189392, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.11696407198905945, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.10435730963945389, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.14810042083263397, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.13196389377117157, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.12165071070194244, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10124514251947403, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09692727029323578, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0758289322257042, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06304088979959488, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05618290975689888, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.05444958433508873, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03794492036104202, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02877121977508068, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.027961887419223785, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.025554807856678963, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02443053387105465, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.019491178914904594, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.01817990094423294, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.016131488606333733, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011203019879758358, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.24528273940086365, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.2137083113193512, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.20320017635822296, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.17381349205970764, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.11304691433906555, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.1006697490811348, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.13109540939331055, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1207771822810173, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.11669883131980896, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09203135967254639, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.08448950946331024, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06696896255016327, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05785960331559181, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.0543314553797245, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.05348426103591919, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.033501338213682175, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.028175588697195053, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.027796223759651184, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.0239836685359478, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.023396072909235954, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01766018383204937, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01769085042178631, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.01627090387046337, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011892636306583881, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.18085932731628418, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1688411682844162, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.16444335877895355, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.14792028069496155, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08541633933782578, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.08062781393527985, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09644738584756851, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08910485357046127, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.08682208508253098, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07609419524669647, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07172048836946487, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.049167729914188385, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04272216185927391, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.041059352457523346, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.040661755949258804, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.0245896615087986, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02123960666358471, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.021057600155472755, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.019354654476046562, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01910659670829773, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012944447807967663, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013089392334222794, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.012296738103032112, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008830351755023003, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2778908908367157, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.25994086265563965, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2539275586605072, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.22857919335365295, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.13143789768218994, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.12454156577587128, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.14733174443244934, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.13597463071346283, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.13341864943504333, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.1172565445303917, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.11055105179548264, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07515557110309601, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06513753533363342, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.06306926161050797, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.06258668750524521, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.0375736802816391, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.032389748841524124, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.03216482698917389, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.029484234750270844, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02917364053428173, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.019743522629141808, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.019517097622156143, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.018901638686656952, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012747151777148247, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.2379239946603775, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.21114858984947205, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.19860956072807312, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.178646981716156, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10852931439876556, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0969037413597107, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13207842409610748, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.12100120633840561, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.11240886151790619, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09366316348314285, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08958699554204941, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06727823615074158, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05817051976919174, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05237240344285965, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.050925660878419876, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03401077911257744, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02763601951301098, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02703426405787468, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02475467324256897, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.023845063522458076, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.018500899896025658, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.018385207280516624, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.01655835285782814, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012909249402582645, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.12513479590415955, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.11360963433980942, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10616281628608704, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.0952988937497139, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.0578555166721344, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.051758117973804474, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.073453888297081, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06696226447820663, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05975109711289406, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.05104561150074005, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.048917535692453384, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03752656280994415, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03213750571012497, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.027966370806097984, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02690415270626545, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.018810207024216652, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.014698194339871407, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.01417150441557169, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.013349450193345547, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.012686093337833881, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009873742237687111, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009881898760795593, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.008273421786725521, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006702505983412266, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.10991694033145905, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09994732588529587, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.09230270981788635, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.0828406810760498, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.050679974257946014, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.0447305403649807, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06605662405490875, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06032337248325348, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.0522315576672554, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.044747840613126755, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.04303036257624626, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03345755487680435, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.028834683820605278, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.024471549317240715, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.02334551326930523, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.016804955899715424, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.012817196547985077, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.012232696637511253, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01164473220705986, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.010927787981927395, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008778239600360394, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008701101876795292, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.007181114051491022, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005751131102442741, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.2424110323190689, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.21967419981956482, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.2094990462064743, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.1875714510679245, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.1123223751783371, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.10243473947048187, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.13516172766685486, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12297115474939346, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.115749292075634, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09810567647218704, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09317072480916977, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06882341206073761, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05870778486132622, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05387823283672333, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.05270183086395264, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.034387726336717606, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.027505651116371155, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02693275362253189, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.024547766894102097, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02376103214919567, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.017650891095399857, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.016875505447387695, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.015564458444714546, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010310988873243332, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.21215641498565674, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.18799123167991638, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.17913496494293213, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.15256795287132263, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09745427221059799, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08746088296175003, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11504745483398438, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10529779642820358, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10112577676773071, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08175421506166458, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07409580796957016, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05916059389710426, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05070021376013756, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04718313366174698, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04629824310541153, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.029831700026988983, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.024971336126327515, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02461651898920536, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.021925222128629684, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02137770690023899, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016343284398317337, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01645473763346672, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.014973432756960392, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011885048821568489, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.17262928187847137, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.16086570918560028, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15646861493587494, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.14069975912570953, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08151521533727646, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07680927217006683, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09233050793409348, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08527365326881409, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.0829312801361084, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07251537591218948, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06840483099222183, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04711994528770447, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.040940187871456146, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.039233893156051636, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03882625699043274, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.023578528314828873, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.020365476608276367, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02017444744706154, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.018552415072917938, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01829635351896286, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012459095567464828, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012659029103815556, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01179832685738802, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008649793453514576, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.26558902859687805, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.24825164675712585, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.24225975573062897, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.21799533069133759, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.12556232511997223, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11886769533157349, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.14076833426952362, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.13010632991790771, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12750758230686188, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.1119152382016182, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.105362668633461, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07183544337749481, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.0623270720243454, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.06025492027401924, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.059770774096250534, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.035912904888391495, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.030968667939305305, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.030739400535821915, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.028177917003631592, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02785961702466011, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.018802829086780548, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01871333457529545, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.017955366522073746, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012253517284989357, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.23444649577140808, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.2065620869398117, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.193372443318367, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.17409352958202362, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10660373419523239, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0943867564201355, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13138645887374878, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11971812695264816, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.11073200404644012, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09146349132061005, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08781032264232635, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06683053821325302, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.057402946054935455, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05141787603497505, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.049921490252017975, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03370046615600586, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.027073830366134644, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02644667588174343, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.024129007011651993, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.023173406720161438, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.018294326961040497, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01799483224749565, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.016251351684331894, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01249727513641119, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11710581183433533, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10628634691238403, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.0968172699213028, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08710862696170807, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.054126132279634476, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04697806015610695, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07336757332086563, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06618529558181763, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05586244910955429, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.047810591757297516, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04655846580862999, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03751825913786888, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.0317385271191597, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.026238225400447845, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.024787846952676773, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.018796654418110847, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.013894524425268173, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.013140714727342129, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012663307599723339, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.011759336106479168, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009892056696116924, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009741025045514107, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007794552482664585, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006592501886188984, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.10092989355325699, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.0907038226723671, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07768192887306213, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.07009823620319366, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.045850880444049835, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03709466755390167, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.0689406767487526, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06246013566851616, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.047701504081487656, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.040752802044153214, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.04025154933333397, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.034683968871831894, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.029743054881691933, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.022281046956777573, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.02015087939798832, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.017422137781977654, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.011827236972749233, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.01070250105112791, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01083723921328783, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.00950342882424593, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.009111988358199596, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008825616911053658, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006440788507461548, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005682140588760376, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.24633540213108063, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.2186485230922699, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.20428875088691711, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.18237800896167755, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.11258530616760254, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09962042421102524, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1449311524629593, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12887296080589294, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.11718541383743286, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09732341021299362, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09335491806268692, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0745684802532196, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06159747391939163, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05417822301387787, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.052305564284324646, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03738074004650116, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02799317054450512, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.027087032794952393, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.024899819865822792, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.023665083572268486, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01928860880434513, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.018120409920811653, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.015710290521383286, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011529292911291122, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.23726792633533478, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.20328649878501892, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.19123169779777527, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1614888459444046, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.10754002630710602, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.09487514197826385, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12853914499282837, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11760629713535309, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.1125027984380722, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08567941188812256, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07940115034580231, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06586778908967972, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05651110038161278, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.05197310820221901, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.05085616558790207, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03297564014792442, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.027417395263910294, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02694646269083023, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.023164521902799606, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.022409973666071892, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.017653055489063263, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.0180587749928236, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015882741659879684, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012811078689992428, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.1575218141078949, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14647383987903595, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14214563369750977, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.12783238291740417, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07433261722326279, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06981103122234344, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0847540870308876, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07822158187627792, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07567337900400162, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06605373322963715, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.0623454786837101, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04333963990211487, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03754950687289238, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03577565401792526, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03535660356283188, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.021696439012885094, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.018593067303299904, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.018395692110061646, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.016933096572756767, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01666838489472866, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011519580148160458, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01162970345467329, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.010849019512534142, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00796338077634573, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.24131251871585846, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22506707906723022, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21933835744857788, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.19734054803848267, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11401266604661942, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10768240690231323, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.128557026386261, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11870396137237549, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11591411381959915, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10150285810232162, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09572955220937729, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0656711608171463, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05691489577293396, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05480261892080307, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.054298222064971924, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.032875943928956985, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.028340864926576614, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02810489758849144, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.0258041862398386, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02548789419233799, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017368486151099205, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01741328462958336, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.016509998589754105, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01172893401235342, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.21122318506240845, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.18474987149238586, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.17216378450393677, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.15405410528182983, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09599380940198898, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.08427053689956665, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11855010688304901, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10843638330698013, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.09984773397445679, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08150173723697662, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.07803384214639664, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06062145531177521, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.052170008420944214, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04645353555679321, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.0450274832546711, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.0307806096971035, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02473585493862629, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.024141354486346245, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02195768803358078, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.021054288372397423, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01702031120657921, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01680641807615757, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.015120682306587696, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012048527598381042, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11466183513402939, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10402652621269226, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09531863033771515, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08577825874090195, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05272402986884117, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.046203870326280594, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07185955345630646, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06386134028434753, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.054611872881650925, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04672742635011673, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04539082944393158, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03668798878788948, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.030579470098018646, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.025513045489788055, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02418706752359867, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.018393106758594513, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.01344738807529211, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012767400592565536, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012274505570530891, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.011434193700551987, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.00962857250124216, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009311930276453495, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007563363295048475, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006221093703061342, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09905854612588882, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09043660759925842, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08086809515953064, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.07278987765312195, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04549376666545868, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.038891833275556564, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06393351405858994, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.058045417070388794, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.046956729143857956, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.040542490780353546, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03954899683594704, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03252886235713959, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02766520343720913, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.022008540108799934, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.020458895713090897, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01625985838472843, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.011571782641112804, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.010766211897134781, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01061180979013443, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009660464711487293, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008494345471262932, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008232136256992817, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006419568322598934, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005344074219465256, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.26449671387672424, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.24120378494262695, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.23064082860946655, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.20700755715370178, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.12309194356203079, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.11276880651712418, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.14911696314811707, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.13463330268859863, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.1268158107995987, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10843921452760696, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.10315129160881042, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.076297827064991, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06454113870859146, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.059123486280441284, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.05780420079827309, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03823370113968849, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.030336637049913406, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.029706543311476707, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.027324268594384193, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02645665779709816, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.019684968516230583, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.018852630630135536, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.01715739816427231, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011844594962894917, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.202353835105896, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1700456440448761, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.15925955772399902, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1387900859117508, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09296418726444244, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.07902856916189194, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1094902977347374, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09886474907398224, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09614522755146027, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07345541566610336, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.0675060823559761, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05609676614403725, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04750115051865578, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.0447879284620285, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04414009675383568, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.028150297701358795, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.023486020043492317, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.023209119215607643, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.019673068076372147, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.019230982288718224, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015015346929430962, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015073792077600956, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.013822797685861588, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.010608711279928684, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.15192033350467682, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14113371074199677, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13685549795627594, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.12301204353570938, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07158815860748291, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.0671602189540863, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08181937783956528, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07552071660757065, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07295078784227371, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06359505653381348, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.060028232634067535, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04185234382748604, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03629671782255173, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03452576696872711, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03410082682967186, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02095421403646469, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.018066812306642532, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.0178608950227499, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01647539623081684, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01620539464056492, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011183860711753368, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01148397009819746, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.010508331470191479, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00805638637393713, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.20791570842266083, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1937105804681778, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.18859758973121643, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1696448028087616, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.09886440634727478, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.09325093030929565, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1118612140417099, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10319668799638748, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.1005546972155571, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.08811290562152863, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.08315788954496384, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.057815562933683395, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05049172043800354, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04855813458561897, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.048096366226673126, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.029130853712558746, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02682705782353878, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.026622595265507698, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024849403649568558, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.024581553414463997, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016464726999402046, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018757769837975502, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01575336419045925, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.015110904350876808, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.21124231815338135, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.1856469064950943, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.17407317459583282, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.1547044962644577, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09646738320589066, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.08544792979955673, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11802967637777328, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10741224884986877, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.09979185461997986, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08156541734933853, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.07744602859020233, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05980595573782921, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05150722339749336, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04662354290485382, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04540295898914337, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.030103519558906555, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02472141571342945, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02421344816684723, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.021861162036657333, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.021085696294903755, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016239922493696213, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016551533713936806, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.014542278833687305, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011816390790045261, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11111422628164291, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.100423164665699, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09021207690238953, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08112946152687073, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.051144927740097046, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.043621864169836044, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07119418680667877, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06415434181690216, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05296526849269867, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.045141689479351044, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04406900331377983, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03635929152369499, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03082970716059208, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.024855736643075943, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.023250840604305267, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0182390995323658, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.013247016817331314, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.01242312416434288, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012088077142834663, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.01110086403787136, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009645048528909683, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.00951217208057642, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007454326841980219, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006490342319011688, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.0997641459107399, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08965002000331879, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07643520832061768, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06897442787885666, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.045317426323890686, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03641427308320999, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06912116706371307, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06212440878152847, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04726794362068176, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.040290772914886475, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.039964817464351654, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.035185396671295166, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02972046472132206, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.022082842886447906, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.019873082637786865, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.017681095749139786, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.011773938313126564, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.010607392527163029, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01080174371600151, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009432617574930191, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.009190182201564312, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008888771757483482, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006364631466567516, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0057808575220406055, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.24076354503631592, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.21253551542758942, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1968216449022293, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.1760602444410324, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10958302021026611, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09584890305995941, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.14357851445674896, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12799587845802307, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.11446478217840195, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09474393725395203, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09132708609104156, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07429677248001099, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.061220429837703705, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.052771180868148804, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.05059356242418289, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03735646978020668, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.027372973039746284, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.026311039924621582, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02440611459314823, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.022978655993938446, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.019319739192724228, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.018101543188095093, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.015322262421250343, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011577470228075981, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.18777845799922943, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.16492204368114471, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.15765102207660675, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.13390497863292694, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.08673478662967682, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.07828263938426971, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09997033327817917, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09116440266370773, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.08894402533769608, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.06992544233798981, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.06565967202186584, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.051282335072755814, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04395899176597595, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.041958317160606384, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04146105423569679, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.02572617307305336, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.022252453491091728, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.022040043026208878, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01902526430785656, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.018707895651459694, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.013903728686273098, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01454504020512104, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.013112739659845829, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.010705122724175453, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.14584854245185852, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.13587071001529694, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13184085488319397, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1185721680521965, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.06927572935819626, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06515452265739441, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07891777902841568, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07278762012720108, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07046633213758469, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06166825070977211, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.05840639770030975, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04085411876440048, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03560181334614754, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03404185548424721, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.0336642824113369, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.020627278834581375, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.01879679411649704, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.018623944371938705, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.017394879832863808, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01717248372733593, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01166438776999712, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013149200938642025, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011096258647739887, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010554206557571888, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.1534024327993393, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1429966241121292, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13915908336639404, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.12517192959785461, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07289504259824753, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06876230239868164, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08253250271081924, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.0760783925652504, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07409973442554474, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06493630260229111, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06135579198598862, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0426766499876976, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.037098199129104614, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.035657189786434174, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.035316064953804016, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.021516086533665657, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.019493723288178444, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.019338328391313553, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.018008558079600334, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01780320517718792, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012096510268747807, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013397304341197014, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01155620813369751, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010586786083877087, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.17624680697917938, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.1556851863861084, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.14651426672935486, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.12848851084709167, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.08133043348789215, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.07274995744228363, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.09956934303045273, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.08892495185136795, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.08333875983953476, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.06805051118135452, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.06440556049346924, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.050443537533283234, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.0432964451611042, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.03993639349937439, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.03909844905138016, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.02587968111038208, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02214411459863186, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.021786091849207878, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.019753821194171906, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.019242985174059868, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.015014564618468285, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.015801306813955307, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.013951889239251614, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012541103176772594, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.10668952018022537, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09631432592868805, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08658826351165771, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.07774786651134491, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.0490027517080307, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.041841115802526474, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.0682082250714302, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06141263246536255, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.050785161554813385, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04317626357078552, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.042137838900089264, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03480510413646698, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.029466789215803146, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.02378767542541027, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.022254960611462593, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.017438383772969246, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.01264653168618679, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.011865276843309402, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011521902866661549, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.01058002095669508, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009196925908327103, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009049064479768276, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007080395240336657, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006136737298220396, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09764493256807327, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08765844255685806, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07522080093622208, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06781651079654694, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04435543715953827, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.035944271832704544, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06752779334783554, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06018967926502228, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04620876535773277, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.039277538657188416, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03890053927898407, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.034184012562036514, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.028705507516860962, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.021604737266898155, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.019556274637579918, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.017156334593892097, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.011499391868710518, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.0104361018165946, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010520046576857567, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009263625368475914, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.00899236835539341, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008606402203440666, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0063304477371275425, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005623134318739176, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.24342438578605652, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.21234673261642456, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1948210895061493, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.17408804595470428, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.11026731878519058, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09508946537971497, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.145784392952919, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.13027220964431763, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.11579670757055283, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09445050358772278, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09143931418657303, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07514099031686783, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06223783642053604, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.053104218095541, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.05076821520924568, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03777423873543739, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.027465814724564552, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02631095051765442, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02426310069859028, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.022701416164636612, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0194235946983099, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.018181342631578445, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.015246802009642124, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011409844271838665, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.12221070379018784, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10776066780090332, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10339236259460449, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08795098960399628, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05418452247977257, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0488305538892746, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.0645606741309166, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.056942399591207504, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.055684663355350494, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.044327422976493835, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04122401401400566, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0321931466460228, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.028973694890737534, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.027799854055047035, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02752809226512909, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016920648515224457, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.017067795619368553, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.016953542828559875, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01551627553999424, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.01536094956099987, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01032547652721405, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.013535035774111748, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.00987126212567091, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012084778398275375, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.13073022663593292, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1221928521990776, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.11899043619632721, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.10697983205318451, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.06174309179186821, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.05829346179962158, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.06991492211818695, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.0643988847732544, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.06268975883722305, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.05494863912463188, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.0518859401345253, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.035740144550800323, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.030885331332683563, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.029675669968128204, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.029395217075943947, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.017866225913167, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.015365942381322384, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.015227324329316616, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.013990290462970734, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.013811969198286533, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.00942854955792427, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.009485801681876183, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.008950877003371716, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.0064184460788965225, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.09030161798000336, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.08415258675813675, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.08189533650875092, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.07354327291250229, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.04258973151445389, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.040164582431316376, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.04833756759762764, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.044453397393226624, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.04328435659408569, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0378413163125515, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.03574034571647644, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.024737583473324776, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.021405357867479324, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.020568322390317917, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.020369326695799828, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.012424350716173649, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.010851106606423855, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.010755963623523712, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.009922008961439133, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.009798946790397167, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.0067443810403347015, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.00698293000459671, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.006428421474993229, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.0050632404163479805, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.11115241050720215, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.10097404569387436, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.0951182022690773, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.08253265172243118, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.051608629524707794, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.04685458168387413, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.06531306356191635, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.05796603113412857, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.05291159451007843, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.044420696794986725, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.04217885807156563, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.03351811692118645, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.028362389653921127, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.02555226720869541, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.0248445812612772, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.01723642274737358, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.014497659169137478, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.014184728264808655, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.013245167210698128, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.012828058563172817, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.010149398818612099, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01076665148139, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.009240161627531052, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.008758008480072021, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + } + ], + "last_module_idx": 66, + "base_perplexity": 2.5938224395851686 +} \ No newline at end of file